diff --git a/README.md b/README.md
index 28d96db59a06b898223de6a30182b69458533a9b..79b65ee5001a2f003f119f2ddb907ec7ad033b03 100644
--- a/README.md
+++ b/README.md
@@ -44,8 +44,9 @@ The tool currently supports various popular image matching algorithms, namely:
| Algorithm | Supported | Conference/Journal | Year | GitHub Link |
|------------------|-----------|--------------------|------|-------------|
-| LiftFeat | ✅ | ICRA | 2025 | [Link](https://github.com/lyp-deeplearning/LiftFeat) |
+| RIPE | ✅ | ICCV | 2025 | [Link](https://github.com/fraunhoferhhi/RIPE) |
| RDD | ✅ | CVPR | 2025 | [Link](https://github.com/xtcpete/rdd) |
+| LiftFeat | ✅ | ICRA | 2025 | [Link](https://github.com/lyp-deeplearning/LiftFeat) |
| DaD | ✅ | ARXIV | 2025 | [Link](https://github.com/Parskatt/dad) |
| MINIMA | ✅ | ARXIV | 2024 | [Link](https://github.com/LSXI7/MINIMA) |
| XoFTR | ✅ | CVPR | 2024 | [Link](https://github.com/OnderT/XoFTR) |
diff --git a/config/config.yaml b/config/config.yaml
index 3f8ccc9d1d78920534bb270a162c2b8553100ab9..cee2cfad63273ca659a84d4405eb4a5623313646 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -267,6 +267,17 @@ matcher_zoo:
paper: https://arxiv.org/abs/2505.0342
project: null
display: true
+ ripe(+mnn):
+ matcher: NN-mutual
+ feature: ripe
+ dense: false
+ info:
+ name: RIPE #dispaly name
+ source: "ICCV 2025"
+ github: https://github.com/fraunhoferhhi/RIPE
+ paper: https://arxiv.org/abs/2507.04839
+ project: https://fraunhoferhhi.github.io/RIPE
+ display: true
rdd(sparse):
matcher: NN-mutual
feature: rdd
@@ -274,7 +285,7 @@ matcher_zoo:
info:
name: RDD(sparse) #dispaly name
source: "CVPR 2025"
- github: hhttps://github.com/xtcpete/rdd
+ github: https://github.com/xtcpete/rdd
paper: https://arxiv.org/abs/2505.08013
project: https://xtcpete.github.io/rdd
display: true
@@ -284,7 +295,7 @@ matcher_zoo:
info:
name: RDD(dense) #dispaly name
source: "CVPR 2025"
- github: hhttps://github.com/xtcpete/rdd
+ github: https://github.com/xtcpete/rdd
paper: https://arxiv.org/abs/2505.08013
project: https://xtcpete.github.io/rdd
display: true
diff --git a/imcui/hloc/extract_features.py b/imcui/hloc/extract_features.py
index 199584749281cdcd41be0f121f414146bf2187a5..213b217b01cdc948a27802c185d72e98328e9d40 100644
--- a/imcui/hloc/extract_features.py
+++ b/imcui/hloc/extract_features.py
@@ -236,6 +236,17 @@ confs = {
"resize_max": 1600,
},
},
+ "ripe": {
+ "output": "feats-ripe-n2048-r1600",
+ "model": {
+ "name": "ripe",
+ "max_keypoints": 2048,
+ },
+ "preprocessing": {
+ "grayscale": False,
+ "resize_max": 1600,
+ },
+ },
"aliked-n16-rot": {
"output": "feats-aliked-n16-rot",
"model": {
diff --git a/imcui/hloc/extractors/ripe.py b/imcui/hloc/extractors/ripe.py
new file mode 100644
index 0000000000000000000000000000000000000000..1746a4f54d924f5802565b6ee6d42b0bf6b9f821
--- /dev/null
+++ b/imcui/hloc/extractors/ripe.py
@@ -0,0 +1,46 @@
+import sys
+from pathlib import Path
+from ..utils.base_model import BaseModel
+from .. import logger, MODEL_REPO_ID
+
+ripe_path = Path(__file__).parent / "../../third_party/RIPE"
+sys.path.append(str(ripe_path))
+
+from ripe import vgg_hyper
+
+
+class RIPE(BaseModel):
+ default_conf = {
+ "keypoint_threshold": 0.05,
+ "max_keypoints": 5000,
+ "model_name": "weights_ripe.pth",
+ }
+
+ required_inputs = ["image"]
+
+ def _init(self, conf):
+ logger.info("Loading RIPE model...")
+ model_path = self._download_model(
+ repo_id=MODEL_REPO_ID,
+ filename="{}/{}".format(Path(__file__).stem, self.conf["model_name"]),
+ )
+ self.net = vgg_hyper(Path(model_path))
+ logger.info("Loading RIPE model done!")
+
+ def _forward(self, data):
+ keypoints, descriptors, scores = self.net.detectAndCompute(
+ data["image"], threshold=0.5, top_k=2048
+ )
+
+ if self.conf["max_keypoints"] < len(keypoints):
+ idxs = scores.argsort()[-self.conf["max_keypoints"] or None :]
+ keypoints = keypoints[idxs, :2]
+ descriptors = descriptors[idxs]
+ scores = scores[idxs]
+
+ pred = {
+ "keypoints": keypoints[None],
+ "descriptors": descriptors[None].permute(0, 2, 1),
+ "scores": scores[None],
+ }
+ return pred
diff --git a/imcui/third_party/RIPE/.gitignore b/imcui/third_party/RIPE/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..64d70e37740d19516c15265f36dd609ebb1f1ff4
--- /dev/null
+++ b/imcui/third_party/RIPE/.gitignore
@@ -0,0 +1,179 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+### VisualStudioCode
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+*.code-workspace
+**/.vscode
+
+# JetBrains
+.idea/
+
+# ignore outputs
+/outputs/
+
+# ignore logs
+/logs/
+tmp.py
+.env
+
+# ignore pretrained pytorch models
+*.pth
+
+# ignore lightning_logs
+/lightning_logs/*
+
+# ignore built apptainer images
+*.sif
+
+# ignore the outputs server on the cluster
+/output/*
+# ignore .out files generated from the cluster
+*.out
+# ignore hparams_search folder
+/hparams_search_configs/*
+
+*.o
+*.pkl
+*.ninja_deps
+*.ninja_log
+*.ninja
+
+/misc/*
+/tmp/*
+/apptainer_env.box/*
+/scripts/tmp_build/*
+/checkpoints
+/pretrained_weights
+/results_supple_cvpr
+/ext_files
\ No newline at end of file
diff --git a/imcui/third_party/RIPE/LICENSE b/imcui/third_party/RIPE/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..42e084ab537ccb04abfc35cce690a57afa9c45a8
--- /dev/null
+++ b/imcui/third_party/RIPE/LICENSE
@@ -0,0 +1,35 @@
+Software Copyright License for Academic Use of RIPE, Version 2.0
+
+© Copyright (2025) Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V.
+
+1. INTRODUCTION
+
+RIPE which means any source code, object code or binary files provided by Fraunhofer excluding third party software and materials, is made available under this Software Copyright License.
+
+2. COPYRIGHT LICENSE
+
+Internal use of RIPE, in source and binary forms, with or without modification, is permitted without payment of copyright license fees for non-commercial purposes of evaluation, testing and academic research.
+
+No right or license, express or implied, is granted to any part of RIPE except and solely to the extent as expressly set forth herein. Any commercial use or exploitation of RIPE and/or any modifications thereto under this license are prohibited.
+
+For any other use of RIPE than permitted by this software copyright license You need another license from Fraunhofer. In such case please contact Fraunhofer under the CONTACT INFORMATION below.
+
+3. LIMITED PATENT LICENSE
+
+If Fraunhofer patents are implemented by RIPE their use is permitted for internal non-commercial purposes of evaluation, testing and academic research. No patent grant is provided for any other use, including but not limited to commercial use or exploitation.
+
+Fraunhofer provides no warranty of patent non-infringement with respect to RIPE.
+
+4. PLACE OF JURISDICTION
+
+German law shall apply to all disputes arising from the use of the licensed software. A court in Munich shall have local jurisdiction.
+
+5. DISCLAIMER
+
+RIPE is provided by Fraunhofer "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties of fitness for a particular purpose. IN NO EVENT SHALL FRAUNHOFER BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages, including but not limited to procurement of substitute goods or services; loss of use, data, or profits, or business interruption, however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence), arising in any way out of the use of the Fraunhofer Software, even if advised of the possibility of such damage.
+
+6. CONTACT INFORMATION
+
+Fraunhofer-Institut für Nachrichtentechnik, Heinrich-Hertz-Institut, HHI
+Einsteinufer 37, 10587 Berlin, Germany
+info@hhi.fraunhofer.de
diff --git a/imcui/third_party/RIPE/LICENSE_DALF_DISK b/imcui/third_party/RIPE/LICENSE_DALF_DISK
new file mode 100644
index 0000000000000000000000000000000000000000..989e2c59e973a05cfbfe9de678b7f2af777b0713
--- /dev/null
+++ b/imcui/third_party/RIPE/LICENSE_DALF_DISK
@@ -0,0 +1,201 @@
+Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
\ No newline at end of file
diff --git a/imcui/third_party/RIPE/README.md b/imcui/third_party/RIPE/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9ee5b3d1a78e2a0c484f7b721712d37a32b0b739
--- /dev/null
+++ b/imcui/third_party/RIPE/README.md
@@ -0,0 +1,367 @@
+#
+
+
RIPE:
Reinforcement Learning on Unlabeled Image Pairs for Robust Keypoint Extraction
🌊🌺 ICCV 2025 🌺🌊
+
+ Johannes Künzel
+ ·
+ Anna Hilsmann
+ ·
+ Peter Eisert
+
+
+ Arxiv |
+ Project Page |
+ 🤗Demo🤗
+
+
+
+
+
+
+
+ RIPE demonstrates that keypoint detection and description can be learned from image pairs only - no depth, no pose, no artificial augmentation required.
+
+
+## Setup
+
+💡**Alternative**💡 Install nothing locally and try our Hugging Face demo: [🤗Demo🤗](https://huggingface.co/spaces/JohannesK14/RIPE)
+
+1. Install mamba by following the instructions given here: [Mamba Installation](https://mamba.readthedocs.io/en/latest/installation/mamba-installation.html)
+
+2. Create a new environment with:
+```bash
+mamba create -f conda_env.yml
+mamba activate ripe-env
+```
+
+## How to use
+
+Or just check [demo.py](demo.py)
+
+```python
+import cv2
+import kornia.feature as KF
+import kornia.geometry as KG
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from torchvision.io import decode_image
+
+from ripe import vgg_hyper
+from ripe.utils.utils import cv2_matches_from_kornia, resize_image, to_cv_kpts
+
+dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+model = vgg_hyper().to(dev)
+model.eval()
+
+image1 = resize_image(decode_image("assets/all_souls_000013.jpg").float().to(dev) / 255.0)
+image2 = resize_image(decode_image("assets/all_souls_000055.jpg").float().to(dev) / 255.0)
+
+kpts_1, desc_1, score_1 = model.detectAndCompute(image1, threshold=0.5, top_k=2048)
+kpts_2, desc_2, score_2 = model.detectAndCompute(image2, threshold=0.5, top_k=2048)
+
+matcher = KF.DescriptorMatcher("mnn") # threshold is not used with mnn
+match_dists, match_idxs = matcher(desc_1, desc_2)
+
+matched_pts_1 = kpts_1[match_idxs[:, 0]]
+matched_pts_2 = kpts_2[match_idxs[:, 1]]
+
+H, mask = KG.ransac.RANSAC(model_type="fundamental", inl_th=1.0)(matched_pts_1, matched_pts_2)
+matchesMask = mask.int().ravel().tolist()
+
+result_ransac = cv2.drawMatches(
+ (image1.cpu().permute(1, 2, 0).numpy() * 255.0).astype(np.uint8),
+ to_cv_kpts(kpts_1, score_1),
+ (image2.cpu().permute(1, 2, 0).numpy() * 255.0).astype(np.uint8),
+ to_cv_kpts(kpts_2, score_2),
+ cv2_matches_from_kornia(match_dists, match_idxs),
+ None,
+ matchColor=(0, 255, 0),
+ matchesMask=matchesMask,
+ # matchesMask=None, # without RANSAC filtering
+ singlePointColor=(0, 0, 255),
+ flags=cv2.DrawMatchesFlags_DEFAULT,
+)
+
+plt.imshow(result_ransac)
+plt.axis("off")
+plt.tight_layout()
+
+plt.show()
+# plt.savefig("result_ransac.png")
+```
+
+## Reproduce the results
+
+### MegaDepth 1500 & HPatches
+
+1. Download and install [Glue Factory](https://github.com/cvg/glue-factory)
+2. Add this repo as a submodule to Glue Factory:
+```bash
+cd glue-factory
+git submodule add https://github.com/fraunhoferhhi/RIPE.git thirdparty/ripe
+```
+3. Create the new file ripe.py under gluefactory/models/extractors/ with the following content:
+
+
+ ripe.py
+
+ ```python
+ import sys
+ from pathlib import Path
+
+ import torch
+ import torchvision.transforms as transforms
+
+ from ..base_model import BaseModel
+
+ ripe_path = Path(__file__).parent / "../../../thirdparty/ripe"
+
+ print(f"RIPE Path: {ripe_path.resolve()}")
+ # check if the path exists
+ if not ripe_path.exists():
+ raise RuntimeError(f"RIPE path not found: {ripe_path}")
+
+ sys.path.append(str(ripe_path))
+
+ from ripe import vgg_hyper
+
+
+ class RIPE(BaseModel):
+ default_conf = {
+ "name": "RIPE",
+ "model_path": None,
+ "chunk": 4,
+ "dense_outputs": False,
+ "threshold": 1.0,
+ "top_k": 2048,
+ }
+
+ required_data_keys = ["image"]
+
+ # Initialize the line matcher
+ def _init(self, conf):
+ self.normalizer = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+ self.model = vgg_hyper(model_path=conf.model_path)
+ self.model.eval()
+
+ self.set_initialized()
+
+ def _forward(self, data):
+ image = data["image"]
+
+ keypoints, scores, descriptors = [], [], []
+
+ chunk = self.conf.chunk
+
+ for i in range(0, image.shape[0], chunk):
+ if self.conf.dense_outputs:
+ raise NotImplementedError("Dense outputs are not supported")
+ else:
+ im = image[: min(image.shape[0], i + chunk)]
+ im = self.normalizer(im)
+
+ H, W = im.shape[-2:]
+
+ kpt, desc, score = self.model.detectAndCompute(
+ im,
+ threshold=self.conf.threshold,
+ top_k=self.conf.top_k,
+ )
+ keypoints += [kpt.squeeze(0)]
+ scores += [score.squeeze(0)]
+ descriptors += [desc.squeeze(0)]
+
+ del kpt
+ del desc
+ del score
+
+ keypoints = torch.stack(keypoints, 0)
+ scores = torch.stack(scores, 0)
+ descriptors = torch.stack(descriptors, 0)
+
+ pred = {
+ # "keypoints": keypoints.to(image) + 0.5,
+ "keypoints": keypoints.to(image),
+ "keypoint_scores": scores.to(image),
+ "descriptors": descriptors.to(image),
+ }
+
+ return pred
+
+ def loss(self, pred, data):
+ raise NotImplementedError
+ ```
+
+
+
+4. Create ripe+NN.yaml in gluefactory/configs with the following content:
+
+
+ ripe+NN.yaml
+
+ ```yaml
+ model:
+ name: two_view_pipeline
+ extractor:
+ name: extractors.ripe
+ threshold: 1.0
+ top_k: 2048
+ matcher:
+ name: matchers.nearest_neighbor_matcher
+ benchmarks:
+ megadepth1500:
+ data:
+ preprocessing:
+ side: long
+ resize: 1600
+ eval:
+ estimator: poselib
+ ransac_th: 0.5
+ hpatches:
+ eval:
+ estimator: poselib
+ ransac_th: 0.5
+ model:
+ extractor:
+ top_k: 1024 # overwrite config above
+ ```
+
+5. Run the MegaDepth 1500 evaluation script:
+
+```bash
+python -m gluefactory.eval.megadepth1500 --conf ripe+NN # for MegaDepth 1500
+```
+
+Should result in:
+
+```bash
+'rel_pose_error@10°': 0.6834,
+'rel_pose_error@20°': 0.7803,
+'rel_pose_error@5°': 0.5511,
+```
+
+6. Run the HPatches evaluation script:
+
+```bash
+python -m gluefactory.eval.hpatches --conf ripe+NN # for HPatches
+```
+
+Should result in:
+
+```bash
+'H_error_ransac@1px': 0.3793,
+'H_error_ransac@3px': 0.5893,
+'H_error_ransac@5px': 0.692,
+```
+
+
+
+## Training
+
+1. Create a .env file with the following content:
+```bash
+OUTPUT_DIR="/output"
+DATA_DIR="/data"
+```
+
+2. Download the required datasets:
+
+
+ DISK Megadepth subset
+
+ To download the dataset used by [DISK](https://github.com/cvlab-epfl/disk) execute the following commands:
+
+ ```bash
+ cd data
+ bash download_disk_data.sh
+ ```
+
+
+
+
+ Tokyo 24/7
+
+ - ⚠️**Optional**⚠️: Only if you are interest in the model used in Section 4.6 of the paper!
+ - Download the Tokyo 24/7 query images from here: [Tokyo 24/7 Query Images V3](http://www.ok.ctrl.titech.ac.jp/~torii/project/247/download/247query_v3.zip) from the official [website](http://www.ok.ctrl.titech.ac.jp/~torii/project/247/_).
+ - extract them into data/Tolyo_Query_V3
+
+ ```bash
+ Tokyo_Query_V3/
+ ├── 00001.csv
+ ├── 00001.jpg
+ ├── 00002.csv
+ ├── 00002.jpg
+ ├── ...
+ ├── 01125.csv
+ ├── 01125.jpg
+ ├── Readme.txt
+ └── Readme.txt~
+ ```
+
+
+
+
+ ACDC
+
+ - ⚠️**Optional**⚠️: Only if you are interest in the model used in Section 6.1 (supplementary) of the paper!
+ - Download the RGB images from here: [ACDC RGB Images](https://acdc.vision.ee.ethz.ch/rgb_anon_trainvaltest.zip)
+ - extract them into data/ACDC
+
+ ```bash
+ ACDC/
+ rgb_anon
+ ├── fog
+ │ ├── test
+ │ │ ├── GOPR0475
+ │ │ ├── GOPR0477
+ │ ├── test_ref
+ │ │ ├── GOPR0475
+ │ │ ├── GOPR0477
+ │ ├── train
+ │ │ ├── GOPR0475
+ │ │ ├── GOPR0476
+ ├── night
+ ```
+
+
+
+3. Run the training script:
+
+```bash
+python ripe/train.py --config-name train project_name=train name=reproduce wandb_mode=offline
+```
+
+You can also easily switch setting from the command line, e.g. to addionally train on the Tokyo 24/7 dataset:
+```bash
+python ripe/train.py --config-name train project_name=train name=reproduce wandb_mode=offline data=megadepth+tokyo
+```
+
+## Acknowledgements
+
+Our code is partly based on the following repositories:
+- [DALF](https://github.com/verlab/DALF_CVPR_2023) Apache License 2.0
+- [DeDoDe](https://github.com/Parskatt/DeDoDe) MIT License
+- [DISK](https://github.com/cvlab-epfl/disk) Apache License 2.0
+
+Our evaluation was based on the following repositories:
+- [Glue Factory](https://github.com/cvg/glue-factory)
+- [hloc](https://github.com/cvg/Hierarchical-Localization)
+
+We would like to thank the authors of these repositories for their great work and for making their code available.
+
+Our project webpage is based on the [Acadamic Project Page Template](https://github.com/eliahuhorwitz/Academic-project-page-template) by Eliahu Horwitz.
+
+## BibTex Citation
+
+```
+
+@article{ripe2025,
+year = {2025},
+title = {{RIPE: Reinforcement Learning on Unlabeled Image Pairs for Robust Keypoint Extraction}},
+author = {Künzel, Johannes and Hilsmann, Anna and Eisert, Peter},
+journal = {arXiv},
+eprint = {2507.04839},
+}
+```
diff --git a/imcui/third_party/RIPE/app.py b/imcui/third_party/RIPE/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..7201bded055efd1321dae63fb0db94110263a137
--- /dev/null
+++ b/imcui/third_party/RIPE/app.py
@@ -0,0 +1,272 @@
+# This is a small gradio interface to access our RIPE keypoint extractor.
+# You can either upload two images or use one of the example image pairs.
+
+import os
+
+import gradio as gr
+from PIL import Image
+
+from ripe import vgg_hyper
+
+SEED = 32000
+os.environ["PYTHONHASHSEED"] = str(SEED)
+
+import random
+from pathlib import Path
+
+import numpy as np
+import torch
+
+torch.manual_seed(SEED)
+np.random.seed(SEED)
+random.seed(SEED)
+import cv2
+import kornia.feature as KF
+import kornia.geometry as KG
+
+from ripe.utils.utils import cv2_matches_from_kornia, to_cv_kpts
+
+MIN_SIZE = 512
+MAX_SIZE = 768
+
+description_text = """
+
+
🌊🌺 ICCV 2025 🌺🌊
+
+ Johannes Künzel ·
+ Anna Hilsmann ·
+ Peter Eisert
+
+
+
+
+
+
+
+### This demo showcases our new keypoint extractor model, RIPE (Reinforcement Learning on Unlabeled Image Pairs for Robust Keypoint Extraction).
+
+### RIPE is trained without requiring pose or depth supervision or artificial augmentations. By leveraging reinforcement learning, it learns to extract keypoints solely based on whether an image pair depicts the same scene or not.
+
+### For more detailed information, please refer to our [paper](link to be added).
+
+The demo code extracts the 2048-top keypoints from the two input images. It uses the mutual nearest neighbor (MNN) descriptor matcher from kornia to find matches between the two images.
+If the number of matches is greater than 8, it applies RANSAC to filter out outliers based on the inlier threshold provided by the user.
+Images are resized to fit within a maximum size of 2048x2048 pixels with maintained aspect ratio.
+
+
+"""
+
+path_weights = Path(
+ "/media/jwkuenzel/work/projects/CVG_Reinforced_Keypoints/output/train/ablation_iccv/inlier_threshold/1571243/2025-02-19/14-00-10_789013/model_inlier_threshold_best.pth"
+)
+
+model = vgg_hyper(path_weights)
+
+
+def get_new_image_size(image, min_size=1600, max_size=2048):
+ """
+ Get a new size for the image that is scaled to fit between min_size and max_size while maintaining the aspect ratio.
+
+ Args:
+ image (PIL.Image): Input image.
+ min_size (int): Minimum allowed size for width and height.
+ max_size (int): Maximum allowed size for width and height.
+
+ Returns:
+ tuple: New size (width, height) for the image.
+ """
+ width, height = image.size
+
+ aspect_ratio = width / height
+ if width > height:
+ new_width = max(min_size, min(max_size, width))
+ new_height = int(new_width / aspect_ratio)
+ else:
+ new_height = max(min_size, min(max_size, height))
+ new_width = int(new_height * aspect_ratio)
+
+ new_size = (new_width, new_height)
+
+ return new_size
+
+
+def extract_keypoints(image1, image2, inl_th):
+ """
+ Extract keypoints from two input images using the RIPE model.
+
+ Args:
+ image1 (PIL.Image): First input image.
+ image2 (PIL.Image): Second input image.
+ inl_th (float): RANSAC inlier threshold.
+
+ Returns:
+ dict: A dictionary containing keypoints and matches.
+ """
+ log_text = "Extracting keypoints and matches with RIPE\n"
+
+ log_text += f"Image 1 size: {image1.size}\n"
+ log_text += f"Image 2 size: {image2.size}\n"
+
+ # check not larger than 2048x2048
+ new_size = get_new_image_size(image1, min_size=MIN_SIZE, max_size=MAX_SIZE)
+ image1 = image1.resize(new_size)
+
+ new_size = get_new_image_size(image2, min_size=MIN_SIZE, max_size=MAX_SIZE)
+ image2 = image2.resize(new_size)
+
+ log_text += f"Resized Image 1 size: {image1.size}\n"
+ log_text += f"Resized Image 2 size: {image2.size}\n"
+
+ dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ model.to(dev)
+
+ image1 = image1.convert("RGB")
+ image2 = image2.convert("RGB")
+
+ image1_original = image1.copy()
+ image2_original = image2.copy()
+
+ # convert PIL images to numpy arrays
+ image1_original = np.array(image1_original)
+ image2_original = np.array(image2_original)
+
+ # convert PIL images to tensors
+ image1 = torch.tensor(np.array(image1)).permute(2, 0, 1).float() / 255.0
+ image2 = torch.tensor(np.array(image2)).permute(2, 0, 1).float() / 255.0
+
+ image1 = image1.to(dev).unsqueeze(0) # Add batch dimension
+ image2 = image2.to(dev).unsqueeze(0) # Add batch dimension
+
+ kpts_1, desc_1, score_1 = model.detectAndCompute(image1, threshold=0.5, top_k=2048)
+ kpts_2, desc_2, score_2 = model.detectAndCompute(image2, threshold=0.5, top_k=2048)
+
+ log_text += f"Number of keypoints in image 1: {kpts_1.shape[0]}\n"
+ log_text += f"Number of keypoints in image 2: {kpts_2.shape[0]}\n"
+
+ matcher = KF.DescriptorMatcher("mnn") # threshold is not used with mnn
+ match_dists, match_idxs = matcher(desc_1, desc_2)
+
+ log_text += f"Number of MNN matches: {match_idxs.shape[0]}\n"
+
+ cv2_matches = cv2_matches_from_kornia(match_dists, match_idxs)
+
+ do_ransac = match_idxs.shape[0] > 8
+
+ if do_ransac:
+ matched_pts_1 = kpts_1[match_idxs[:, 0]]
+ matched_pts_2 = kpts_2[match_idxs[:, 1]]
+
+ H, mask = KG.ransac.RANSAC(model_type="fundamental", inl_th=inl_th)(matched_pts_1, matched_pts_2)
+ matchesMask = mask.int().ravel().tolist()
+
+ log_text += f"RANSAC found {mask.sum().item()} inliers out of {mask.shape[0]} matches with an inlier threshold of {inl_th}.\n"
+ else:
+ log_text += "Not enough matches for RANSAC, skipping RANSAC step.\n"
+
+ kpts_1 = to_cv_kpts(kpts_1, score_1)
+ kpts_2 = to_cv_kpts(kpts_2, score_2)
+
+ keypoints_raw_1 = cv2.drawKeypoints(image1_original, kpts_1, image1_original, color=(0, 255, 0))
+ keypoints_raw_2 = cv2.drawKeypoints(image2_original, kpts_2, image2_original, color=(0, 255, 0))
+
+ # pad height smaller image to match the height of the larger image
+ if keypoints_raw_1.shape[0] < keypoints_raw_2.shape[0]:
+ pad_height = keypoints_raw_2.shape[0] - keypoints_raw_1.shape[0]
+ keypoints_raw_1 = np.pad(
+ keypoints_raw_1, ((0, pad_height), (0, 0), (0, 0)), mode="constant", constant_values=255
+ )
+ elif keypoints_raw_1.shape[0] > keypoints_raw_2.shape[0]:
+ pad_height = keypoints_raw_1.shape[0] - keypoints_raw_2.shape[0]
+ keypoints_raw_2 = np.pad(
+ keypoints_raw_2, ((0, pad_height), (0, 0), (0, 0)), mode="constant", constant_values=255
+ )
+
+ # concatenate keypoints images horizontally
+ keypoints_raw = np.concatenate((keypoints_raw_1, keypoints_raw_2), axis=1)
+ keypoints_raw_pil = Image.fromarray(keypoints_raw)
+
+ result_raw = cv2.drawMatches(
+ image1_original,
+ kpts_1,
+ image2_original,
+ kpts_2,
+ cv2_matches,
+ None,
+ matchColor=(0, 255, 0),
+ matchesMask=None,
+ # matchesMask=None,
+ flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS,
+ )
+
+ if not do_ransac:
+ result_ransac = None
+ else:
+ result_ransac = cv2.drawMatches(
+ image1_original,
+ kpts_1,
+ image2_original,
+ kpts_2,
+ cv2_matches,
+ None,
+ matchColor=(0, 255, 0),
+ matchesMask=matchesMask,
+ singlePointColor=(0, 0, 255),
+ flags=cv2.DrawMatchesFlags_DEFAULT,
+ )
+
+ # result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB) # Convert BGR to RGB for display
+
+ # convert to PIL Image
+ result_raw_pil = Image.fromarray(result_raw)
+ if result_ransac is not None:
+ result_ransac_pil = Image.fromarray(result_ransac)
+ else:
+ result_ransac_pil = None
+
+ return log_text, result_ransac_pil, result_raw_pil, keypoints_raw_pil
+
+
+demo = gr.Interface(
+ fn=extract_keypoints,
+ inputs=[
+ gr.Image(type="pil", label="Image 1"),
+ gr.Image(type="pil", label="Image 2"),
+ gr.Slider(
+ minimum=0.1,
+ maximum=3.0,
+ step=0.1,
+ value=0.5,
+ label="RANSAC inlier threshold",
+ info="Threshold for RANSAC inlier detection. Lower values may yield fewer inliers but more robust matches.",
+ ),
+ ],
+ outputs=[
+ gr.Textbox(type="text", label="Log"),
+ gr.Image(type="pil", label="Keypoints and Matches (RANSAC)"),
+ gr.Image(type="pil", label="Keypoints and Matches"),
+ gr.Image(type="pil", label="Keypoint Detection Results"),
+ ],
+ title="RIPE: Reinforcement Learning on Unlabeled Image Pairs for Robust Keypoint Extraction",
+ description=description_text,
+ examples=[
+ [
+ "assets_gradio/all_souls_000013.jpg",
+ "assets_gradio/all_souls_000055.jpg",
+ ],
+ [
+ "assets_gradio/167170681_0e5c42fd21_o.jpg",
+ "assets_gradio/170804731_6bf4fbecd4_o.jpg",
+ ],
+ [
+ "assets_gradio/4171014767_0fe879b783_o.jpg",
+ "assets_gradio/4174108353_20422632d6_o.jpg",
+ ],
+ ],
+ flagging_mode="never",
+ theme="default",
+)
+demo.launch()
diff --git a/imcui/third_party/RIPE/assets/all_souls_000013.jpg b/imcui/third_party/RIPE/assets/all_souls_000013.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..68e5cfc4ccccc8a7cc245b20a12627c79eeac829
--- /dev/null
+++ b/imcui/third_party/RIPE/assets/all_souls_000013.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60fd73963102f86baf08325631f8912db34acba7fb46cc9a41b818099276187e
+size 439703
diff --git a/imcui/third_party/RIPE/assets/all_souls_000055.jpg b/imcui/third_party/RIPE/assets/all_souls_000055.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cbdba46346d96fd2b3996d1522553977d7af85ad
--- /dev/null
+++ b/imcui/third_party/RIPE/assets/all_souls_000055.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e11c06ae78103c2dbb90737e2bab6aa47f2000948ece5bfe9a1e7eb1cacac53a
+size 367553
diff --git a/imcui/third_party/RIPE/assets/teaser_image.png b/imcui/third_party/RIPE/assets/teaser_image.png
new file mode 100644
index 0000000000000000000000000000000000000000..ee494209011428c5253eb210296ec9f2bc302180
--- /dev/null
+++ b/imcui/third_party/RIPE/assets/teaser_image.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd636ae0eb42927792cba0f04243c2ec65226a6f5e1287ab4ee015353b01c208
+size 1686066
diff --git a/imcui/third_party/RIPE/conda_env.yml b/imcui/third_party/RIPE/conda_env.yml
new file mode 100644
index 0000000000000000000000000000000000000000..92764961642003d2cb76d1be7c2e5c799d6fa466
--- /dev/null
+++ b/imcui/third_party/RIPE/conda_env.yml
@@ -0,0 +1,26 @@
+name: ripe-env
+channels:
+ - conda-forge
+dependencies:
+ - python
+ - cmake
+ - eigen # for poselib
+ - pytorch=2.6=*cuda*
+ - torchvision
+ - pip
+ # others
+ - pudb # debugger
+ - pip:
+ - lightning>=2.0.0
+ - setuptools
+ - poselib @ git+https://github.com/PoseLib/PoseLib.git@56d158f744d3561b0b70174e6d8ca9a7fc9bd9c1
+ - hydra-core
+ - opencv-python
+ - torchmetrics
+ - pyrootutils # standardizing the project root setup
+ - rich
+ - matplotlib
+ - kornia
+ - numpy
+ - wandb
+ - h5py
diff --git a/imcui/third_party/RIPE/conf/backbones/resnet.yaml b/imcui/third_party/RIPE/conf/backbones/resnet.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5919338c20a7ecd8d9374d7a70d2d30cec885984
--- /dev/null
+++ b/imcui/third_party/RIPE/conf/backbones/resnet.yaml
@@ -0,0 +1,6 @@
+_target_: ripe.models.backbones.resnet.ResNet
+nchannels: 3
+pretrained: True
+use_instance_norm: False
+mode: dect
+num_layers: 4
diff --git a/imcui/third_party/RIPE/conf/backbones/vgg.yaml b/imcui/third_party/RIPE/conf/backbones/vgg.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b878b5bccbbe624b0b2618b87604f1d3acd7947a
--- /dev/null
+++ b/imcui/third_party/RIPE/conf/backbones/vgg.yaml
@@ -0,0 +1,5 @@
+_target_: ripe.models.backbones.vgg.VGG
+nchannels: 3
+pretrained: True
+use_instance_norm: False
+mode: dect
diff --git a/imcui/third_party/RIPE/conf/data/disk_megadepth.yaml b/imcui/third_party/RIPE/conf/data/disk_megadepth.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8db2c624494d15e4411977aaf7dfdd35bee1cc10
--- /dev/null
+++ b/imcui/third_party/RIPE/conf/data/disk_megadepth.yaml
@@ -0,0 +1,12 @@
+_target_: ripe.data.datasets.disk_megadepth.DISK_Megadepth
+root: ${oc.env:DATA_DIR}/disk-data
+stage: train
+max_scene_size: 10000
+transforms:
+ _target_: ripe.data.data_transforms.Compose
+ transforms:
+ - _target_: ripe.data.data_transforms.Normalize
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ - _target_: ripe.data.data_transforms.ResizeAndPadWithHomography
+ target_size_longer_side: 560
diff --git a/imcui/third_party/RIPE/conf/data/megadepth+acdc.yaml b/imcui/third_party/RIPE/conf/data/megadepth+acdc.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0e652cf4f10801decb01166c1e46715dafff80cb
--- /dev/null
+++ b/imcui/third_party/RIPE/conf/data/megadepth+acdc.yaml
@@ -0,0 +1,33 @@
+_target_: ripe.data.datasets.dataset_combinator.DatasetCombinator
+mode: custom
+weights:
+ - 0.2
+ - 0.8
+datasets:
+ - _target_: ripe.data.datasets.acdc.ACDC
+ root: ${oc.env:DATA_DIR}/ACDC
+ stage: train
+ condition: all
+ transforms:
+ _target_: ripe.data.data_transforms.Compose
+ transforms:
+ - _target_: ripe.data.data_transforms.Normalize
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ - _target_: ripe.data.data_transforms.Crop # to remove the car hood from some images
+ crop_height: 896
+ crop_width: 1920
+ - _target_: ripe.data.data_transforms.ResizeAndPadWithHomography
+ target_size_longer_side: 560
+ - _target_: ripe.data.datasets.disk_megadepth.DISK_Megadepth
+ root: ${oc.env:DATA_DIR}/disk-data
+ stage: train
+ max_scene_size: 10000
+ transforms:
+ _target_: ripe.data.data_transforms.Compose
+ transforms:
+ - _target_: ripe.data.data_transforms.Normalize
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ - _target_: ripe.data.data_transforms.ResizeAndPadWithHomography
+ target_size_longer_side: 560
diff --git a/imcui/third_party/RIPE/conf/data/megadepth+tokyo.yaml b/imcui/third_party/RIPE/conf/data/megadepth+tokyo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..50415c0a33caf4be1695b8d3431aab130fdc5ec4
--- /dev/null
+++ b/imcui/third_party/RIPE/conf/data/megadepth+tokyo.yaml
@@ -0,0 +1,29 @@
+_target_: ripe.data.datasets.dataset_combinator.DatasetCombinator
+mode: custom
+weights:
+ - 0.2
+ - 0.8
+datasets:
+ - _target_: ripe.data.datasets.tokyo_query_v3.TokyoQueryV3
+ root: ${oc.env:DATA_DIR}/Tokyo_Query_V3
+ stage: train
+ transforms:
+ _target_: ripe.data.data_transforms.Compose
+ transforms:
+ - _target_: ripe.data.data_transforms.Normalize
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ - _target_: ripe.data.data_transforms.ResizeAndPadWithHomography
+ target_size_longer_side: 560 # like DeDoDe
+ - _target_: ripe.data.datasets.disk_megadepth.DISK_Megadepth
+ root: ${oc.env:DATA_DIR}/disk-data
+ stage: train
+ max_scene_size: 10000
+ transforms:
+ _target_: ripe.data.data_transforms.Compose
+ transforms:
+ - _target_: ripe.data.data_transforms.Normalize
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ - _target_: ripe.data.data_transforms.ResizeAndPadWithHomography
+ target_size_longer_side: 560
diff --git a/imcui/third_party/RIPE/conf/descriptor_loss/contrastive_loss.yaml b/imcui/third_party/RIPE/conf/descriptor_loss/contrastive_loss.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f8d143773b044f30f74021676a6e4bbaf911ab5d
--- /dev/null
+++ b/imcui/third_party/RIPE/conf/descriptor_loss/contrastive_loss.yaml
@@ -0,0 +1,3 @@
+_target_: ripe.losses.contrastive_loss.ContrastiveLoss
+pos_margin: 0.2
+neg_margin: 0.2
diff --git a/imcui/third_party/RIPE/conf/inl_th/constant.yaml b/imcui/third_party/RIPE/conf/inl_th/constant.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..974f70b2790b507c8bbc9108f4ef175d6f7d8ed8
--- /dev/null
+++ b/imcui/third_party/RIPE/conf/inl_th/constant.yaml
@@ -0,0 +1,2 @@
+_target_: ripe.scheduler.constant.ConstantScheduler
+value: 1.0
diff --git a/imcui/third_party/RIPE/conf/inl_th/exp_decay.yaml b/imcui/third_party/RIPE/conf/inl_th/exp_decay.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3e37680d88068f60a786a7818c094958a6e12200
--- /dev/null
+++ b/imcui/third_party/RIPE/conf/inl_th/exp_decay.yaml
@@ -0,0 +1,4 @@
+_target_: ripe.scheduler.expDecay.ExpDecay
+a: 2.5
+b: 0.0005
+c: 0.5
diff --git a/imcui/third_party/RIPE/conf/matcher/concurrent_mnn_poselib.yaml b/imcui/third_party/RIPE/conf/matcher/concurrent_mnn_poselib.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..36b927a94ae53c885a6f1b532416a541e692e14e
--- /dev/null
+++ b/imcui/third_party/RIPE/conf/matcher/concurrent_mnn_poselib.yaml
@@ -0,0 +1,8 @@
+_target_: ripe.matcher.concurrent_matcher.ConcurrentMatcher
+min_num_matches: 8
+matcher:
+ _target_: kornia.feature.DescriptorMatcher
+ match_mode: "mnn"
+ th: 0.8
+robust_estimator:
+ _target_: ripe.matcher.pose_estimator_poselib.PoseLibRelativePoseEstimator
diff --git a/imcui/third_party/RIPE/conf/train.yaml b/imcui/third_party/RIPE/conf/train.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aa5ec32b3a236cb2246a511219281b45dc4583a8
--- /dev/null
+++ b/imcui/third_party/RIPE/conf/train.yaml
@@ -0,0 +1,89 @@
+defaults:
+ - data: disk_megadepth # megadepth+acdc or megadepth+tokyo
+ - backbones: vgg
+ - upsampler: hypercolumn_features # interpolate_sparse2D
+ - matcher: concurrent_mnn_poselib
+ - descriptor_loss: contrastive_loss # none to deactivate
+ - inl_th: constant # exp_decay
+ - _self_
+
+project_name: ???
+name: ???
+
+hydra:
+ run:
+ dir: ${oc.env:OUTPUT_DIR}/${project_name}/${name}/${oc.env:SLURM_JOB_ID}/${now:%Y-%m-%d}/${now:%H-%M-%S}
+output_dir: ${hydra:runtime.output_dir}
+
+num_gpus: 1
+# precision: "32-true"
+precision: "bf16-mixed" # numerically more stable
+# precision: "16-mixed"
+
+log_interval: 50 # log every N steps/ batches
+wandb_mode: online
+val_interval: 2000
+conf_inference:
+ threshold: 0.5
+ top_k: 2048
+
+desc_loss_weight: 5.0 # 0.0 to deactivate, also deactivates 1x1 conv
+
+num_workers: 8
+batch_size: 6
+
+transformation_model: fundamental
+
+network:
+ _target_: ripe.models.ripe.RIPE
+ _partial_: true
+ window_size: 8
+ non_linearity_dect:
+ _target_: torch.nn.Identity
+ # _target_: torch.nn.ReLU
+ desc_shares:
+ null
+ # - 64
+ # - 64
+ # - 64
+ # - 64
+
+lr: 0.001 # 0.001 makes it somewhat unstable
+fp_penalty: -1e-7 # -1e-7
+kp_penalty: -7e-7 # -7e-7
+num_grad_accs: 4
+reward_type: inlier # inlier_ratio , inlier+inlier_ratio
+no_filtering_negatives: False
+descriptor_dim: 256
+
+lr_scheduler:
+ _partial_: true
+ _target_: ripe.scheduler.linearLR.StepLinearLR
+ num_steps: ${num_steps}
+ initial_lr: ${lr}
+ final_lr: 1e-6
+
+use_whitening: false
+
+selected_only: False
+
+padding_filter_mode: ignore
+# padding_filter_mode: punish
+
+num_steps: 80000
+
+alpha_scheduler: # 1.0 after 1/3 of the steps
+ _target_: ripe.scheduler.linear_with_plateaus.LinearWithPlateaus
+ start_val: 0.0
+ end_val: 1.0
+ steps_total: ${num_steps}
+ rel_length_start_plateau: 0.0
+ rel_length_end_plateu: 0.6666666
+
+beta_scheduler: # linear increase over all steps
+ _target_: ripe.scheduler.linear_with_plateaus.LinearWithPlateaus
+ start_val: 0.0
+ end_val: 1.0
+ steps_total: ${num_steps}
+ rel_length_start_plateau: 0.0
+ rel_length_end_plateu: 0.0
diff --git a/imcui/third_party/RIPE/conf/upsampler/hypercolumn_features.yaml b/imcui/third_party/RIPE/conf/upsampler/hypercolumn_features.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fcb00de582420a3559c4cdabfe117efbd96b7ce7
--- /dev/null
+++ b/imcui/third_party/RIPE/conf/upsampler/hypercolumn_features.yaml
@@ -0,0 +1,2 @@
+_target_: ripe.models.upsampler.hypercolumn_features.HyperColumnFeatures
+mode: bilinear
diff --git a/imcui/third_party/RIPE/conf/upsampler/interpolate_sparse2D.yaml b/imcui/third_party/RIPE/conf/upsampler/interpolate_sparse2D.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..63625bcbeb7e9b492751b7924b7215e439c8f22c
--- /dev/null
+++ b/imcui/third_party/RIPE/conf/upsampler/interpolate_sparse2D.yaml
@@ -0,0 +1 @@
+_target_: ripe.models.upsampler.interpolate_sparse2d.InterpolateSparse2d
diff --git a/imcui/third_party/RIPE/data/download_disk_data.sh b/imcui/third_party/RIPE/data/download_disk_data.sh
new file mode 100644
index 0000000000000000000000000000000000000000..843d190dac553060744eebedae26bca9e8045d86
--- /dev/null
+++ b/imcui/third_party/RIPE/data/download_disk_data.sh
@@ -0,0 +1,43 @@
+#/usr/bin/env bash
+
+# get the data (zipped)
+# wget -r https://datasets.epfl.ch/disk-data/index.html
+
+cd datasets.epfl.ch/disk-data;
+
+# check for MD5 match
+# md5sum -c md5sum.txt;
+# if [ $? ]; then
+# echo "MD5 mismatch (corrupt download)";
+# return 1;
+# fi
+
+# create a crude progress counter
+ITER=1;
+TOTAL=138;
+# unzip test scenes
+cd imw2020-val/scenes;
+for SCENE_TAR in *.tar.gz; do
+ echo "Unzipping $SCENE_TAR ($ITER / $TOTAL)";
+ tar -xz --strip-components=3 -f $SCENE_TAR;
+ rm $SCENE_TAR;
+ ITER=$(($ITER+1));
+done
+
+# unzip megadepth scenes
+cd ../../megadepth/scenes;
+for SCENE_TAR in *.tar; do
+ echo "Unzipping $SCENE_TAR ($ITER / $TOTAL)";
+ tar -x --strip-components=3 -f $SCENE_TAR;
+ rm $SCENE_TAR;
+ ITER=$(($ITER+1));
+done
+
+cd ../../../../
+
+mv datasets.epfl.ch/disk-data ./
+rm -rf datasets.epfl.ch
+
+
+
+
diff --git a/imcui/third_party/RIPE/demo.py b/imcui/third_party/RIPE/demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..83733fe62cc56525e82f9747c11bc3415b6305e9
--- /dev/null
+++ b/imcui/third_party/RIPE/demo.py
@@ -0,0 +1,51 @@
+import cv2
+import kornia.feature as KF
+import kornia.geometry as KG
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from torchvision.io import decode_image
+
+from ripe import vgg_hyper
+from ripe.utils.utils import cv2_matches_from_kornia, resize_image, to_cv_kpts
+
+dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+model = vgg_hyper().to(dev)
+model.eval()
+
+image1 = resize_image(decode_image("assets/all_souls_000013.jpg").float().to(dev) / 255.0)
+image2 = resize_image(decode_image("assets/all_souls_000055.jpg").float().to(dev) / 255.0)
+
+kpts_1, desc_1, score_1 = model.detectAndCompute(image1, threshold=0.5, top_k=2048)
+kpts_2, desc_2, score_2 = model.detectAndCompute(image2, threshold=0.5, top_k=2048)
+
+matcher = KF.DescriptorMatcher("mnn") # threshold is not used with mnn
+match_dists, match_idxs = matcher(desc_1, desc_2)
+
+matched_pts_1 = kpts_1[match_idxs[:, 0]]
+matched_pts_2 = kpts_2[match_idxs[:, 1]]
+
+H, mask = KG.ransac.RANSAC(model_type="fundamental", inl_th=1.0)(matched_pts_1, matched_pts_2)
+matchesMask = mask.int().ravel().tolist()
+
+result_ransac = cv2.drawMatches(
+ (image1.cpu().permute(1, 2, 0).numpy() * 255.0).astype(np.uint8),
+ to_cv_kpts(kpts_1, score_1),
+ (image2.cpu().permute(1, 2, 0).numpy() * 255.0).astype(np.uint8),
+ to_cv_kpts(kpts_2, score_2),
+ cv2_matches_from_kornia(match_dists, match_idxs),
+ None,
+ matchColor=(0, 255, 0),
+ matchesMask=matchesMask,
+ # matchesMask=None, # without RANSAC filtering
+ singlePointColor=(0, 0, 255),
+ flags=cv2.DrawMatchesFlags_DEFAULT,
+)
+
+plt.imshow(result_ransac)
+plt.axis("off")
+plt.tight_layout()
+
+# plt.show()
+plt.savefig("result_ransac.png")
diff --git a/imcui/third_party/RIPE/ripe/__init__.py b/imcui/third_party/RIPE/ripe/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..793dc2d9a5e311689d859cfd521497de45cacd37
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/__init__.py
@@ -0,0 +1 @@
+from .model_zoo import vgg_hyper # noqa: F401
diff --git a/imcui/third_party/RIPE/ripe/benchmarks/imw_2020.py b/imcui/third_party/RIPE/ripe/benchmarks/imw_2020.py
new file mode 100644
index 0000000000000000000000000000000000000000..80dc5eafe4229634fd812f58d1e7192da3ec94ad
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/benchmarks/imw_2020.py
@@ -0,0 +1,320 @@
+import os
+from pathlib import Path
+
+import cv2
+import kornia.feature as KF
+import matplotlib.pyplot as plt
+import numpy as np
+import poselib
+import torch
+from tqdm import tqdm
+
+from ripe import utils
+from ripe.data.data_transforms import Compose, Normalize, Resize
+from ripe.data.datasets.disk_imw import DISK_IMW
+from ripe.utils.pose_error import AUCMetric, relative_pose_error
+from ripe.utils.utils import (
+ cv2_matches_from_kornia,
+ cv_resize_and_pad_to_shape,
+ to_cv_kpts,
+)
+
+log = utils.get_pylogger(__name__)
+
+
+class IMW_2020_Benchmark:
+ def __init__(
+ self,
+ use_predefined_subset: bool = True,
+ conf_inference=None,
+ edge_input_divisible_by=None,
+ ):
+ data_dir = os.getenv("DATA_DIR")
+ if data_dir is None:
+ raise ValueError("Environment variable DATA_DIR is not set.")
+ root_path = Path(data_dir) / "disk-data"
+
+ self.data = DISK_IMW(
+ str(
+ root_path
+ ), # Resize only to ensure that the input size is divisible the value of edge_input_divisible_by
+ transforms=Compose(
+ [
+ Resize(None, edge_input_divisible_by),
+ Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+ ]
+ ),
+ )
+ self.ids_subset = None
+ self.results = []
+ self.conf_inference = conf_inference
+
+ # fmt: off
+ if use_predefined_subset:
+ self.ids_subset = [4921, 3561, 3143, 6040, 802, 6828, 5338, 9275, 10764, 10085, 5124, 11355, 7, 10027, 2161, 4433, 6887, 3311, 10766,
+ 11451, 11433, 8539, 2581, 10300, 10562, 1723, 8803, 6275, 10140, 11487, 6238, 638, 8092, 9979, 201, 10394, 3414,
+ 9002, 7456, 2431, 632, 6589, 9265, 9889, 3139, 7890, 10619, 4899, 675, 176, 4309, 4814, 3833, 3519, 148, 4560, 10705,
+ 3744, 1441, 4049, 1791, 5106, 575, 1540, 1105, 6791, 1383, 9344, 501, 2504, 4335, 8992, 10970, 10786, 10405, 9317,
+ 5279, 1396, 5044, 9408, 11125, 10417, 7627, 7480, 1358, 7738, 5461, 10178, 9226, 8106, 2766, 6216, 4032, 7298, 259,
+ 3021, 2645, 8756, 7513, 3163, 2510, 6701, 6684, 3159, 9689, 7425, 6066, 1904, 6382, 3052, 777, 6277, 7409, 5997, 2987,
+ 11316, 2894, 4528, 1927, 10366, 8605, 2726, 1886, 2416, 2164, 3352, 2997, 6636, 6765, 5609, 3679, 76, 10956, 3612, 6699,
+ 1741, 8811, 3755, 1285, 9520, 2476, 3977, 370, 9823, 1834, 7551, 6227, 7303, 6399, 4758, 10713, 5050, 380, 11056, 7620,
+ 4826, 6090, 9011, 7523, 7355, 8021, 9801, 1801, 6522, 7138, 10017, 8732, 6402, 3116, 4031, 6088, 3975, 9841, 9082, 9412,
+ 5406, 217, 2385, 8791, 8361, 494, 4319, 5275, 3274, 335, 6731, 207, 10095, 3068, 5996, 3951, 2808, 5877, 6134, 7772, 10042,
+ 8574, 5501, 10885, 7871]
+ # self.ids_subset = self.ids_subset[:10]
+ # fmt: on
+
+ def evaluate_sample(self, model, sample, dev):
+ img_1 = sample["src_image"].unsqueeze(0).to(dev)
+ img_2 = sample["trg_image"].unsqueeze(0).to(dev)
+
+ scale_h_1, scale_w_1 = (
+ sample["orig_size_src"][0] / img_1.shape[2],
+ sample["orig_size_src"][1] / img_1.shape[3],
+ )
+ scale_h_2, scale_w_2 = (
+ sample["orig_size_trg"][0] / img_2.shape[2],
+ sample["orig_size_trg"][1] / img_2.shape[3],
+ )
+
+ M = None
+ info = {}
+ kpts_1, desc_1, score_1 = None, None, None
+ kpts_2, desc_2, score_2 = None, None, None
+ match_dists, match_idxs = None, None
+
+ try:
+ kpts_1, desc_1, score_1 = model.detectAndCompute(img_1, **self.conf_inference)
+ kpts_2, desc_2, score_2 = model.detectAndCompute(img_2, **self.conf_inference)
+
+ if kpts_1.dim() == 3:
+ assert kpts_1.shape[0] == 1 and kpts_2.shape[0] == 1, "Batch size must be 1"
+
+ kpts_1, desc_1, score_1 = (
+ kpts_1.squeeze(0),
+ desc_1[0].squeeze(0),
+ score_1[0].squeeze(0),
+ )
+ kpts_2, desc_2, score_2 = (
+ kpts_2.squeeze(0),
+ desc_2[0].squeeze(0),
+ score_2[0].squeeze(0),
+ )
+
+ scale_1 = torch.tensor([scale_w_1, scale_h_1], dtype=torch.float).to(dev)
+ scale_2 = torch.tensor([scale_w_2, scale_h_2], dtype=torch.float).to(dev)
+
+ kpts_1 = kpts_1 * scale_1
+ kpts_2 = kpts_2 * scale_2
+
+ matcher = KF.DescriptorMatcher("mnn") # threshold is not used with mnn
+ match_dists, match_idxs = matcher(desc_1, desc_2)
+
+ matched_pts_1 = kpts_1[match_idxs[:, 0]]
+ matched_pts_2 = kpts_2[match_idxs[:, 1]]
+
+ camera_1 = sample["src_camera"]
+ camera_2 = sample["trg_camera"]
+
+ M, info = poselib.estimate_relative_pose(
+ matched_pts_1.cpu().numpy(),
+ matched_pts_2.cpu().numpy(),
+ camera_1.to_cameradict(),
+ camera_2.to_cameradict(),
+ {
+ "max_epipolar_error": 0.5,
+ },
+ {},
+ )
+ except RuntimeError as e:
+ if "No keypoints detected" in str(e):
+ pass
+ else:
+ raise e
+
+ success = M is not None
+ if success:
+ M = {
+ "R": torch.tensor(M.R, dtype=torch.float),
+ "t": torch.tensor(M.t, dtype=torch.float),
+ }
+ inl = info["inliers"]
+ else:
+ M = {
+ "R": torch.eye(3, dtype=torch.float),
+ "t": torch.zeros((3), dtype=torch.float),
+ }
+ inl = np.zeros((0,)).astype(bool)
+
+ t_err, r_err = relative_pose_error(sample["s2t_R"].cpu(), sample["s2t_T"].cpu(), M["R"], M["t"])
+
+ rel_pose_error = max(t_err.item(), r_err.item()) if success else np.inf
+ ransac_inl = np.sum(inl)
+ ransac_inl_ratio = np.mean(inl)
+
+ if success:
+ assert match_dists is not None and match_idxs is not None, "Matches must be computed"
+ cv_keypoints_src = to_cv_kpts(kpts_1, score_1)
+ cv_keypoints_trg = to_cv_kpts(kpts_2, score_2)
+ cv_matches = cv2_matches_from_kornia(match_dists, match_idxs)
+ cv_mask = [int(m) for m in inl]
+ else:
+ cv_keypoints_src, cv_keypoints_trg = [], []
+ cv_matches, cv_mask = [], []
+
+ estimation = {
+ "success": success,
+ "M_0to1": M,
+ "inliers": torch.tensor(inl).to(img_1),
+ "rel_pose_error": rel_pose_error,
+ "ransac_inl": ransac_inl,
+ "ransac_inl_ratio": ransac_inl_ratio,
+ "path_src_image": sample["src_path"],
+ "path_trg_image": sample["trg_path"],
+ "cv_keypoints_src": cv_keypoints_src,
+ "cv_keypoints_trg": cv_keypoints_trg,
+ "cv_matches": cv_matches,
+ "cv_mask": cv_mask,
+ }
+
+ return estimation
+
+ def evaluate(self, model, dev, progress_bar=False):
+ model.eval()
+
+ # reset results
+ self.results = []
+
+ for idx in tqdm(
+ self.ids_subset if self.ids_subset is not None else range(len(self.data)),
+ disable=not progress_bar,
+ ):
+ sample = self.data[idx]
+ self.results.append(self.evaluate_sample(model, sample, dev))
+
+ def get_auc(self, threshold=5, downsampled=False):
+ if len(self.results) == 0:
+ raise ValueError("No results to log. Run evaluate first.")
+
+ summary_results = self.calc_auc(downsampled=downsampled)
+
+ return summary_results[f"rel_pose_error@{threshold}°{'__original' if not downsampled else '__downsampled'}"]
+
+ def plot_results(self, num_samples=10, logger=None, step=None, downsampled=False):
+ if len(self.results) == 0:
+ raise ValueError("No results to plot. Run evaluate first.")
+
+ plot_data = []
+
+ for result in self.results[:num_samples]:
+ img1 = cv2.imread(result["path_src_image"])
+ img2 = cv2.imread(result["path_trg_image"])
+
+ # from BGR to RGB
+ img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
+ img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
+
+ plt_matches = cv2.drawMatches(
+ img1,
+ result["cv_keypoints_src"],
+ img2,
+ result["cv_keypoints_trg"],
+ result["cv_matches"],
+ None,
+ matchColor=None,
+ matchesMask=result["cv_mask"],
+ flags=cv2.DrawMatchesFlags_DEFAULT,
+ )
+ file_name = (
+ Path(result["path_src_image"]).parent.parent.name
+ + "_"
+ + Path(result["path_src_image"]).stem
+ + Path(result["path_trg_image"]).stem
+ + ("_downsampled" if downsampled else "")
+ + ".png"
+ )
+ # print rel_pose_error on image
+ plt_matches = cv2.putText(
+ plt_matches,
+ f"rel_pose_error: {result['rel_pose_error']:.2f} num_inliers: {result['ransac_inl']} inl_ratio: {result['ransac_inl_ratio']:.2f} num_matches: {len(result['cv_matches'])} num_keypoints: {len(result['cv_keypoints_src'])}/{len(result['cv_keypoints_trg'])}",
+ (10, 30),
+ cv2.FONT_HERSHEY_SIMPLEX,
+ 1,
+ (0, 0, 0),
+ 2,
+ cv2.LINE_8,
+ )
+
+ plot_data.append({"file_name": file_name, "image": plt_matches})
+
+ if logger is None:
+ log.info("No logger provided. Using plt to plot results.")
+ for image in plot_data:
+ plt.imsave(
+ image["file_name"],
+ cv_resize_and_pad_to_shape(image["image"], (1024, 2048)),
+ )
+ plt.close()
+ else:
+ import wandb
+
+ log.info(f"Logging images to wandb with step={step}")
+ if not downsampled:
+ logger.log(
+ {
+ "examples": [
+ wandb.Image(cv_resize_and_pad_to_shape(image["image"], (1024, 2048))) for image in plot_data
+ ]
+ },
+ step=step,
+ )
+ else:
+ logger.log(
+ {
+ "examples_downsampled": [
+ wandb.Image(cv_resize_and_pad_to_shape(image["image"], (1024, 2048))) for image in plot_data
+ ]
+ },
+ step=step,
+ )
+
+ def log_results(self, logger=None, step=None, downsampled=False):
+ if len(self.results) == 0:
+ raise ValueError("No results to log. Run evaluate first.")
+
+ summary_results = self.calc_auc(downsampled=downsampled)
+
+ if logger is not None:
+ logger.log(summary_results, step=step)
+ else:
+ log.warning("No logger provided. Printing results instead.")
+ print(self.calc_auc())
+
+ def print_results(self):
+ if len(self.results) == 0:
+ raise ValueError("No results to print. Run evaluate first.")
+
+ print(self.calc_auc())
+
+ def calc_auc(self, auc_thresholds=None, downsampled=False):
+ if auc_thresholds is None:
+ auc_thresholds = [5, 10, 20]
+ if not isinstance(auc_thresholds, list):
+ auc_thresholds = [auc_thresholds]
+
+ if len(self.results) == 0:
+ raise ValueError("No results to calculate auc. Run evaluate first.")
+
+ rel_pose_errors = [r["rel_pose_error"] for r in self.results]
+
+ pose_aucs = AUCMetric(auc_thresholds, rel_pose_errors).compute()
+ assert isinstance(pose_aucs, list) and len(pose_aucs) == len(auc_thresholds)
+
+ ext = "_downsampled" if downsampled else "_original"
+
+ summary = {}
+ for i, ath in enumerate(auc_thresholds):
+ summary[f"rel_pose_error@{ath}°_{ext}"] = pose_aucs[i]
+
+ return summary
diff --git a/imcui/third_party/RIPE/ripe/data/__init__.py b/imcui/third_party/RIPE/ripe/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/imcui/third_party/RIPE/ripe/data/data_transforms.py b/imcui/third_party/RIPE/ripe/data/data_transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffa197519e73c348c246809b0761c03fb48b9ec9
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/data/data_transforms.py
@@ -0,0 +1,204 @@
+import collections
+import collections.abc
+
+import kornia.geometry as KG
+import numpy as np
+import torch
+from torchvision.transforms import functional as TF
+
+
+class Compose:
+ """Composes several transforms together. The transforms are applied in the order they are passed in.
+ Args: transforms (list): A list of transforms to be applied.
+ """
+
+ def __init__(self, transforms):
+ self.transforms = transforms
+
+ def __call__(self, src, trg, src_mask, trg_mask, h):
+ for t in self.transforms:
+ src, trg, src_mask, trg_mask, h = t(src, trg, src_mask, trg_mask, h)
+
+ return src, trg, src_mask, trg_mask, h
+
+
+class Transform:
+ """Base class for all transforms. It provides a method to apply a transformation function to the input images and masks.
+ Args:
+ src (torch.Tensor): The source image tensor.
+ trg (torch.Tensor): The target image tensor.
+ src_mask (torch.Tensor): The source image mask tensor.
+ trg_mask (torch.Tensor): The target image mask tensor.
+ h (torch.Tensor): The homography matrix tensor.
+ Returns:
+ tuple: A tuple containing the transformed source image, the transformed target image, the transformed source mask,
+ the transformed target mask and the updated homography matrix.
+ """
+
+ def __init__(self):
+ pass
+
+ def apply_transform(self, src, trg, src_mask, trg_mask, h, transfrom_function):
+ src, trg, src_mask, trg_mask, h = transfrom_function(src, trg, src_mask, trg_mask, h)
+ return src, trg, src_mask, trg_mask, h
+
+
+class Normalize(Transform):
+ def __init__(self, mean, std):
+ self.mean = mean
+ self.std = std
+
+ def __call__(self, src, trg, src_mask, trg_mask, h):
+ return self.apply_transform(src, trg, src_mask, trg_mask, h, self.transform_function)
+
+ def transform_function(self, src, trg, src_mask, trg_mask, h):
+ src = TF.normalize(src, mean=self.mean, std=self.std)
+ trg = TF.normalize(trg, mean=self.mean, std=self.std)
+ return src, trg, src_mask, trg_mask, h
+
+
+class ResizeAndPadWithHomography(Transform):
+ def __init__(self, target_size_longer_side=768):
+ self.target_size = target_size_longer_side
+
+ def __call__(self, src, trg, src_mask, trg_mask, h):
+ return self.apply_transform(src, trg, src_mask, trg_mask, h, self.transform_function)
+
+ def transform_function(self, src, trg, src_mask, trg_mask, h):
+ src_w, src_h = src.shape[-1], src.shape[-2]
+ trg_w, trg_h = trg.shape[-1], trg.shape[-2]
+
+ # Resizing logic for both images
+ scale_src, new_src_w, new_src_h = self.compute_resize(src_w, src_h)
+ scale_trg, new_trg_w, new_trg_h = self.compute_resize(trg_w, trg_h)
+
+ # Resize both images
+ src_resized = TF.resize(src, [new_src_h, new_src_w])
+ trg_resized = TF.resize(trg, [new_trg_h, new_trg_w])
+
+ src_mask_resized = TF.resize(src_mask, [new_src_h, new_src_w])
+ trg_mask_resized = TF.resize(trg_mask, [new_trg_h, new_trg_w])
+
+ # Pad the resized images to be square (768x768)
+ src_padded, src_padding = self.apply_padding(src_resized, new_src_w, new_src_h)
+ trg_padded, trg_padding = self.apply_padding(trg_resized, new_trg_w, new_trg_h)
+
+ src_mask_padded, _ = self.apply_padding(src_mask_resized, new_src_w, new_src_h)
+ trg_mask_padded, _ = self.apply_padding(trg_mask_resized, new_trg_w, new_trg_h)
+
+ # Update the homography matrix
+ h = self.update_homography(h, scale_src, src_padding, scale_trg, trg_padding)
+
+ return src_padded, trg_padded, src_mask_padded, trg_mask_padded, h
+
+ def compute_resize(self, w, h):
+ if w > h:
+ scale = self.target_size / w
+ new_w = self.target_size
+ new_h = int(h * scale)
+ else:
+ scale = self.target_size / h
+ new_h = self.target_size
+ new_w = int(w * scale)
+ return scale, new_w, new_h
+
+ def apply_padding(self, img, new_w, new_h):
+ pad_w = (self.target_size - new_w) // 2
+ pad_h = (self.target_size - new_h) // 2
+ padding = [
+ pad_w,
+ pad_h,
+ self.target_size - new_w - pad_w,
+ self.target_size - new_h - pad_h,
+ ]
+ img_padded = TF.pad(img, padding, fill=0) # Zero-pad
+ return img_padded, padding
+
+ def update_homography(self, h, scale_src, padding_src, scale_trg, padding_trg):
+ # Create the scaling matrices
+ scale_matrix_src = np.array([[scale_src, 0, 0], [0, scale_src, 0], [0, 0, 1]])
+ scale_matrix_trg = np.array([[scale_trg, 0, 0], [0, scale_trg, 0], [0, 0, 1]])
+
+ # Create the padding translation matrices
+ pad_matrix_src = np.array([[1, 0, padding_src[0]], [0, 1, padding_src[1]], [0, 0, 1]])
+ pad_matrix_trg = np.array([[1, 0, -padding_trg[0]], [0, 1, -padding_trg[1]], [0, 0, 1]])
+
+ # Update the homography: apply scaling and translation
+ h_updated = (
+ pad_matrix_trg
+ @ scale_matrix_trg
+ @ h.numpy()
+ @ np.linalg.inv(scale_matrix_src)
+ @ np.linalg.inv(pad_matrix_src)
+ )
+
+ return torch.from_numpy(h_updated).float()
+
+
+class Resize(Transform):
+ def __init__(self, output_size, edge_divisible_by=None, side="long", antialias=True):
+ self.output_size = output_size
+ self.edge_divisible_by = edge_divisible_by
+ self.side = side
+ self.antialias = antialias
+
+ def __call__(self, src, trg, src_mask, trg_mask, h):
+ return self.apply_transform(src, trg, src_mask, trg_mask, h, self.transform_function)
+
+ def transform_function(self, src, trg, src_mask, trg_mask, h):
+ new_size_src = self.get_new_image_size(src)
+ new_size_trg = self.get_new_image_size(trg)
+
+ src, T_src = self.resize(src, new_size_src)
+ trg, T_trg = self.resize(trg, new_size_trg)
+
+ src_mask, _ = self.resize(src_mask, new_size_src)
+ trg_mask, _ = self.resize(trg_mask, new_size_trg)
+
+ h = torch.from_numpy(T_trg @ h.numpy() @ T_src).float()
+
+ return src, trg, src_mask, trg_mask, h
+
+ def resize(self, img, size):
+ h, w = img.shape[-2:]
+
+ img = KG.transform.resize(
+ img,
+ size,
+ side=self.side,
+ antialias=self.antialias,
+ align_corners=None,
+ interpolation="bilinear",
+ )
+
+ scale = torch.Tensor([img.shape[-1] / w, img.shape[-2] / h]).to(img)
+ T = np.diag([scale[0].item(), scale[1].item(), 1])
+
+ return img, T
+
+ def get_new_image_size(self, img):
+ h, w = img.shape[-2:]
+
+ if isinstance(self.output_size, collections.abc.Iterable):
+ assert len(self.output_size) == 2
+ return tuple(self.output_size)
+ if self.output_size is None: # keep the original size, but possibly make it divisible by edge_divisible_by
+ size = (h, w)
+ else:
+ side_size = self.output_size
+ aspect_ratio = w / h
+ if self.side not in ("short", "long", "vert", "horz"):
+ raise ValueError(f"side can be one of 'short', 'long', 'vert', and 'horz'. Got '{self.side}'")
+ if self.side == "vert":
+ size = side_size, int(side_size * aspect_ratio)
+ elif self.side == "horz":
+ size = int(side_size / aspect_ratio), side_size
+ elif (self.side == "short") ^ (aspect_ratio < 1.0):
+ size = side_size, int(side_size * aspect_ratio)
+ else:
+ size = int(side_size / aspect_ratio), side_size
+
+ if self.edge_divisible_by is not None:
+ df = self.edge_divisible_by
+ size = list(map(lambda x: int(x // df * df), size))
+ return size
diff --git a/imcui/third_party/RIPE/ripe/data/datasets/__init__.py b/imcui/third_party/RIPE/ripe/data/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/imcui/third_party/RIPE/ripe/data/datasets/acdc.py b/imcui/third_party/RIPE/ripe/data/datasets/acdc.py
new file mode 100644
index 0000000000000000000000000000000000000000..55f75283f3909a684cbd049ba0f8e419a3579b92
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/data/datasets/acdc.py
@@ -0,0 +1,154 @@
+from pathlib import Path
+from typing import Any, Callable, Dict, Optional
+
+import torch
+from torch.utils.data import Dataset
+from torchvision.io import read_image
+
+from ripe import utils
+from ripe.data.data_transforms import Compose
+from ripe.utils.utils import get_other_random_id
+
+log = utils.get_pylogger(__name__)
+
+
+class ACDC(Dataset):
+ def __init__(
+ self,
+ root: Path,
+ stage: str = "train",
+ condition: str = "rain",
+ transforms: Optional[Callable] = None,
+ positive_only: bool = False,
+ ) -> None:
+ self.root = root
+ self.stage = stage
+ self.condition = condition
+ self.transforms = transforms
+ self.positive_only = positive_only
+
+ if isinstance(self.root, str):
+ self.root = Path(self.root)
+
+ if not self.root.exists():
+ raise FileNotFoundError(f"Dataset not found at {self.root}")
+
+ if transforms is None:
+ self.transforms = Compose([])
+ else:
+ self.transforms = transforms
+
+ if self.stage not in ["train", "val", "test", "pred"]:
+ raise RuntimeError(
+ "Unknown option "
+ + self.stage
+ + " as training stage variable. Valid options: 'train', 'val', 'test' and 'pred'"
+ )
+
+ if self.stage == "pred": # prediction uses the test set
+ self.stage = "test"
+
+ if self.stage in ["val", "test", "pred"]:
+ self.positive_only = True
+ log.info(f"{self.stage} stage: Using only positive pairs!")
+
+ weather_conditions = ["fog", "night", "rain", "snow"]
+
+ if self.condition not in weather_conditions + ["all"]:
+ raise RuntimeError(
+ "Unknown option "
+ + self.condition
+ + " as weather condition variable. Valid options: 'fog', 'night', 'rain', 'snow' and 'all'"
+ )
+
+ self.weather_condition_query = weather_conditions if self.condition == "all" else [self.condition]
+
+ self._read_sample_files()
+
+ if positive_only:
+ log.warning("Using only positive pairs!")
+ log.info(f"Found {len(self.src_images)} source images and {len(self.trg_images)} target images.")
+
+ def _read_sample_files(self):
+ file_name_pattern_ref = "_ref_anon.png"
+ file_name_pattern = "_rgb_anon.png"
+
+ self.trg_images = []
+ self.src_images = []
+
+ for weather_condition in self.weather_condition_query:
+ rgb_files = sorted(
+ list(self.root.glob("rgb_anon/" + weather_condition + "/" + self.stage + "/**/*" + file_name_pattern)),
+ key=lambda i: i.stem[:21],
+ )
+
+ src_images = sorted(
+ list(
+ self.root.glob(
+ "rgb_anon/" + weather_condition + "/" + self.stage + "_ref" + "/**/*" + file_name_pattern_ref
+ )
+ ),
+ key=lambda i: i.stem[:21],
+ )
+
+ self.trg_images += rgb_files
+ self.src_images += src_images
+
+ def __len__(self) -> int:
+ if self.positive_only:
+ return len(self.trg_images)
+ return 2 * len(self.trg_images)
+
+ def __getitem__(self, idx: int) -> Dict[str, Any]:
+ sample: Any = {}
+
+ positive_sample = (idx % 2 == 0) or (self.positive_only)
+ if not self.positive_only:
+ idx = idx // 2
+
+ sample["label"] = positive_sample
+
+ if positive_sample:
+ sample["src_path"] = str(self.src_images[idx])
+ sample["trg_path"] = str(self.trg_images[idx])
+
+ assert self.src_images[idx].stem[:21] == self.trg_images[idx].stem[:21], (
+ f"Source and target image mismatch: {self.src_images[idx]} vs {self.trg_images[idx]}"
+ )
+
+ src_img = read_image(sample["src_path"])
+ trg_img = read_image(sample["trg_path"])
+
+ homography = torch.eye(3, dtype=torch.float32)
+ else:
+ sample["src_path"] = str(self.src_images[idx])
+ idx_other = get_other_random_id(idx, len(self) // 2)
+ sample["trg_path"] = str(self.trg_images[idx_other])
+
+ assert self.src_images[idx].stem[:21] != self.trg_images[idx_other].stem[:21], (
+ f"Source and target image match for negative sample: {self.src_images[idx]} vs {self.trg_images[idx_other]}"
+ )
+
+ src_img = read_image(sample["src_path"])
+ trg_img = read_image(sample["trg_path"])
+
+ homography = torch.zeros((3, 3), dtype=torch.float32)
+
+ src_img = src_img / 255.0
+ trg_img = trg_img / 255.0
+
+ _, H, W = src_img.shape
+
+ src_mask = torch.ones((1, H, W), dtype=torch.uint8)
+ trg_mask = torch.ones((1, H, W), dtype=torch.uint8)
+
+ if self.transforms:
+ src_img, trg_img, src_mask, trg_mask, _ = self.transforms(src_img, trg_img, src_mask, trg_mask, homography)
+
+ sample["src_image"] = src_img
+ sample["trg_image"] = trg_img
+ sample["src_mask"] = src_mask.to(torch.bool)
+ sample["trg_mask"] = trg_mask.to(torch.bool)
+ sample["homography"] = homography
+
+ return sample
diff --git a/imcui/third_party/RIPE/ripe/data/datasets/dataset_combinator.py b/imcui/third_party/RIPE/ripe/data/datasets/dataset_combinator.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e3ec893fefa49d338e94b15563dcda675a6664c
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/data/datasets/dataset_combinator.py
@@ -0,0 +1,88 @@
+import torch
+
+from ripe import utils
+
+log = utils.get_pylogger(__name__)
+
+
+class DatasetCombinator:
+ """Combines multiple datasets into one. Length of the combined dataset is the length of the
+ longest dataset. Shorter datasets are looped over.
+
+ Args:
+ datasets: List of datasets to combine.
+ mode: How to sample from the datasets. Can be either "uniform" or "weighted".
+ In "uniform" mode, each dataset is sampled with equal probability.
+ In "weighted" mode, each dataset is sampled with probability proportional to its length.
+ """
+
+ def __init__(self, datasets, mode="uniform", weights=None):
+ self.datasets = datasets
+
+ names_datasets = [type(ds).__name__ for ds in self.datasets]
+ self.lengths = [len(ds) for ds in datasets]
+
+ if mode == "weighted":
+ self.probs_datasets = [length / sum(self.lengths) for length in self.lengths]
+ elif mode == "uniform":
+ self.probs_datasets = [1 / len(self.datasets) for _ in self.datasets]
+ elif mode == "custom":
+ assert weights is not None, "Weights must be provided in custom mode"
+ assert len(weights) == len(datasets), "Number of weights must match number of datasets"
+ assert sum(weights) == 1.0, "Weights must sum to 1"
+ self.probs_datasets = weights
+ else:
+ raise ValueError(f"Unknown mode {mode}")
+
+ log.info("Got the following datasets: ")
+
+ for name, length, prob in zip(names_datasets, self.lengths, self.probs_datasets):
+ log.info(f"{name} with {length} samples and probability {prob}")
+ log.info(f"Total number of samples: {sum(self.lengths)}")
+
+ self.num_samples = max(self.lengths)
+
+ self.dataset_dist = torch.distributions.Categorical(probs=torch.tensor(self.probs_datasets))
+
+ def __len__(self):
+ return self.num_samples
+
+ def __getitem__(self, idx: int):
+ positive_sample = idx % 2 == 0
+
+ if positive_sample:
+ dataset_idx = self.dataset_dist.sample().item()
+
+ idx = torch.randint(0, self.lengths[dataset_idx], (1,)).item()
+ while idx % 2 == 1:
+ idx = torch.randint(0, self.lengths[dataset_idx], (1,)).item()
+
+ return self.datasets[dataset_idx][idx]
+ else:
+ dataset_idx_1 = self.dataset_dist.sample().item()
+ dataset_idx_2 = self.dataset_dist.sample().item()
+
+ if dataset_idx_1 == dataset_idx_2:
+ idx = torch.randint(0, self.lengths[dataset_idx_1], (1,)).item()
+ while idx % 2 == 0:
+ idx = torch.randint(0, self.lengths[dataset_idx_1], (1,)).item()
+ return self.datasets[dataset_idx_1][idx]
+
+ else:
+ idx_1 = torch.randint(0, self.lengths[dataset_idx_1], (1,)).item()
+ idx_2 = torch.randint(0, self.lengths[dataset_idx_2], (1,)).item()
+
+ sample_1 = self.datasets[dataset_idx_1][idx_1]
+ sample_2 = self.datasets[dataset_idx_2][idx_2]
+
+ sample = {
+ "label": False,
+ "src_path": sample_1["src_path"],
+ "trg_path": sample_2["trg_path"],
+ "src_image": sample_1["src_image"],
+ "trg_image": sample_2["trg_image"],
+ "src_mask": sample_1["src_mask"],
+ "trg_mask": sample_2["trg_mask"],
+ "homography": sample_2["homography"],
+ }
+ return sample
diff --git a/imcui/third_party/RIPE/ripe/data/datasets/disk_imw.py b/imcui/third_party/RIPE/ripe/data/datasets/disk_imw.py
new file mode 100644
index 0000000000000000000000000000000000000000..597c11c107fb8cc909023f11715812ae5b5672b2
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/data/datasets/disk_imw.py
@@ -0,0 +1,160 @@
+import json
+import random
+from itertools import accumulate
+from pathlib import Path
+from typing import Any, Callable, Dict, Optional, Tuple
+
+import torch
+from torch.utils.data import Dataset
+from torchvision.io import read_image
+
+from ripe import utils
+from ripe.data.data_transforms import Compose
+from ripe.utils.image_utils import Camera, cameras2F
+
+log = utils.get_pylogger(__name__)
+
+
+class DISK_IMW(Dataset):
+ def __init__(
+ self,
+ root: str,
+ stage: str = "val",
+ # condition: str = "rain",
+ transforms: Optional[Callable] = None,
+ ) -> None:
+ self.root = root
+ self.stage = stage
+ self.transforms = transforms
+
+ if isinstance(self.root, str):
+ self.root = Path(self.root)
+
+ if not self.root.exists():
+ raise FileNotFoundError(f"Dataset not found at {self.root}")
+
+ if transforms is None:
+ self.transforms = Compose([])
+ else:
+ self.transforms = transforms
+
+ if self.stage not in ["val"]:
+ raise RuntimeError("Unknown option " + self.stage + " as training stage variable. Valid options: 'train'")
+
+ json_path = self.root / "imw2020-val" / "dataset.json"
+ with open(json_path) as json_file:
+ json_data = json.load(json_file)
+
+ self.scenes = []
+
+ for scene in json_data:
+ self.scenes.append(Scene(self.root / "imw2020-val", json_data[scene]))
+
+ self.tuples_per_scene = [len(scene) for scene in self.scenes]
+
+ def __len__(self) -> int:
+ return sum(self.tuples_per_scene)
+
+ def __getitem__(self, idx: int) -> Dict[str, Any]:
+ sample: Any = {}
+
+ i_scene, i_image = self._get_scene_and_image_id_from_idx(idx)
+
+ sample["src_path"], sample["trg_path"], path_calib_src, path_calib_trg = self.scenes[i_scene][i_image]
+
+ cam_src = Camera.from_calibration_file(path_calib_src)
+ cam_trg = Camera.from_calibration_file(path_calib_trg)
+
+ F = self.get_F(cam_src, cam_trg)
+ s2t_R, s2t_T = self.get_relative_pose(cam_src, cam_trg)
+
+ src_img = read_image(sample["src_path"]) / 255.0
+ trg_img = read_image(sample["trg_path"]) / 255.0
+
+ _, H_src, W_src = src_img.shape
+ _, H_trg, W_trg = trg_img.shape
+
+ src_mask = torch.ones((1, H_src, W_src), dtype=torch.uint8)
+ trg_mask = torch.ones((1, H_trg, W_trg), dtype=torch.uint8)
+
+ H = torch.eye(3)
+ if self.transforms:
+ src_img, trg_img, src_mask, trg_mask, _ = self.transforms(src_img, trg_img, src_mask, trg_mask, H)
+
+ # check if transformations in self.transforms. Only Normalize is allowed
+ for t in self.transforms.transforms:
+ if t.__class__.__name__ not in ["Normalize", "Resize"]:
+ raise ValueError(f"Transform {t.__class__.__name__} not allowed in DISK_IMW dataset")
+
+ sample["src_image"] = src_img
+ sample["trg_image"] = trg_img
+ sample["orig_size_src"] = (H_src, W_src)
+ sample["orig_size_trg"] = (H_trg, W_trg)
+ sample["src_mask"] = src_mask.to(torch.bool)
+ sample["trg_mask"] = trg_mask.to(torch.bool)
+ sample["F"] = F
+ sample["s2t_R"] = s2t_R
+ sample["s2t_T"] = s2t_T
+ sample["src_camera"] = cam_src
+ sample["trg_camera"] = cam_trg
+
+ return sample
+
+ def get_relative_pose(self, cam_src: Camera, cam_trg: Camera) -> Tuple[torch.Tensor, torch.Tensor]:
+ R = cam_trg.R @ cam_src.R.T
+ T = cam_trg.t - R @ cam_src.t
+
+ return R, T
+
+ def get_F(self, cam_src: Camera, cam_trg: Camera) -> torch.Tensor:
+ F = cameras2F(cam_src, cam_trg)
+
+ return F
+
+ def _get_scene_and_image_id_from_idx(self, idx: int) -> Tuple[int, int]:
+ accumulated_tuples = accumulate(self.tuples_per_scene)
+
+ if idx >= sum(self.tuples_per_scene):
+ raise IndexError(f"Index {idx} out of bounds")
+
+ idx_scene = None
+ for i, accumulated_tuple in enumerate(accumulated_tuples):
+ idx_scene = i
+ if idx < accumulated_tuple:
+ break
+
+ idx_image = idx - sum(self.tuples_per_scene[:idx_scene])
+
+ return idx_scene, idx_image
+
+ def _get_other_random_scene_and_image_id(self, scene_id_to_exclude: int) -> Tuple[int, int]:
+ possible_scene_ids = list(range(len(self.scenes)))
+ possible_scene_ids.remove(scene_id_to_exclude)
+
+ idx_scene = random.choice(possible_scene_ids)
+ idx_image = random.randint(0, len(self.scenes[idx_scene]) - 1)
+
+ return idx_scene, idx_image
+
+
+class Scene:
+ def __init__(self, root_path, scene_data: Dict[str, Any]) -> None:
+ self.root_path = root_path
+ self.image_path = Path(scene_data["image_path"])
+ self.calib_path = Path(scene_data["calib_path"])
+ self.image_names = scene_data["images"]
+ self.tuples = scene_data["tuples"]
+
+ def __len__(self) -> int:
+ return len(self.tuples)
+
+ def __getitem__(self, idx: int) -> Dict[str, Any]:
+ idx_1 = self.tuples[idx][0]
+ idx_2 = self.tuples[idx][1]
+
+ path_image_1 = str(self.root_path / self.image_path / self.image_names[idx_1]) + ".jpg"
+ path_image_2 = str(self.root_path / self.image_path / self.image_names[idx_2]) + ".jpg"
+ path_calib_1 = str(self.root_path / self.calib_path / ("calibration_" + self.image_names[idx_1])) + ".h5"
+ path_calib_2 = str(self.root_path / self.calib_path / ("calibration_" + self.image_names[idx_2])) + ".h5"
+
+ return path_image_1, path_image_2, path_calib_1, path_calib_2
diff --git a/imcui/third_party/RIPE/ripe/data/datasets/disk_megadepth.py b/imcui/third_party/RIPE/ripe/data/datasets/disk_megadepth.py
new file mode 100644
index 0000000000000000000000000000000000000000..aee1427cb362b90fee5ed46aab42cd7c0e4625b9
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/data/datasets/disk_megadepth.py
@@ -0,0 +1,157 @@
+import json
+import random
+from itertools import accumulate
+from pathlib import Path
+from typing import Any, Callable, Dict, Optional, Tuple
+
+import torch
+from torch.utils.data import Dataset
+from torchvision.io import read_image
+
+from ripe import utils
+from ripe.data.data_transforms import Compose
+
+log = utils.get_pylogger(__name__)
+
+
+class DISK_Megadepth(Dataset):
+ def __init__(
+ self,
+ root: str,
+ max_scene_size: int,
+ stage: str = "train",
+ # condition: str = "rain",
+ transforms: Optional[Callable] = None,
+ positive_only: bool = False,
+ ) -> None:
+ self.root = root
+ self.stage = stage
+ self.transforms = transforms
+ self.positive_only = positive_only
+
+ if isinstance(self.root, str):
+ self.root = Path(self.root)
+
+ if not self.root.exists():
+ raise FileNotFoundError(f"Dataset not found at {self.root}")
+
+ if transforms is None:
+ self.transforms = Compose([])
+ else:
+ self.transforms = transforms
+
+ if self.stage not in ["train"]:
+ raise RuntimeError("Unknown option " + self.stage + " as training stage variable. Valid options: 'train'")
+
+ json_path = self.root / "megadepth" / "dataset.json"
+ with open(json_path) as json_file:
+ json_data = json.load(json_file)
+
+ self.scenes = []
+
+ for scene in json_data:
+ self.scenes.append(Scene(self.root / "megadepth", json_data[scene], max_scene_size))
+
+ self.tuples_per_scene = [len(scene) for scene in self.scenes]
+
+ if positive_only:
+ log.warning("Using only positive pairs!")
+
+ def __len__(self) -> int:
+ if self.positive_only:
+ return sum(self.tuples_per_scene)
+ return 2 * sum(self.tuples_per_scene)
+
+ def __getitem__(self, idx: int) -> Dict[str, Any]:
+ sample: Any = {}
+
+ positive_sample = idx % 2 == 0 or self.positive_only
+ if not self.positive_only:
+ idx = idx // 2
+
+ sample["label"] = positive_sample
+
+ i_scene, i_image = self._get_scene_and_image_id_from_idx(idx)
+
+ if positive_sample:
+ sample["src_path"], sample["trg_path"] = self.scenes[i_scene][i_image]
+
+ homography = torch.eye(3, dtype=torch.float32)
+ else:
+ sample["src_path"], _ = self.scenes[i_scene][i_image]
+
+ i_scene_other, i_image_other = self._get_other_random_scene_and_image_id(i_scene)
+
+ sample["trg_path"], _ = self.scenes[i_scene_other][i_image_other]
+
+ homography = torch.zeros((3, 3), dtype=torch.float32)
+
+ src_img = read_image(sample["src_path"]) / 255.0
+ trg_img = read_image(sample["trg_path"]) / 255.0
+
+ _, H_src, W_src = src_img.shape
+ _, H_trg, W_trg = trg_img.shape
+
+ src_mask = torch.ones((1, H_src, W_src), dtype=torch.uint8)
+ trg_mask = torch.ones((1, H_trg, W_trg), dtype=torch.uint8)
+
+ if self.transforms:
+ src_img, trg_img, src_mask, trg_mask, _ = self.transforms(src_img, trg_img, src_mask, trg_mask, homography)
+
+ sample["src_image"] = src_img
+ sample["trg_image"] = trg_img
+ sample["src_mask"] = src_mask.to(torch.bool)
+ sample["trg_mask"] = trg_mask.to(torch.bool)
+ sample["homography"] = homography
+
+ return sample
+
+ def _get_scene_and_image_id_from_idx(self, idx: int) -> Tuple[int, int]:
+ accumulated_tuples = accumulate(self.tuples_per_scene)
+
+ if idx >= sum(self.tuples_per_scene):
+ raise IndexError(f"Index {idx} out of bounds")
+
+ idx_scene = None
+ for i, accumulated_tuple in enumerate(accumulated_tuples):
+ idx_scene = i
+ if idx < accumulated_tuple:
+ break
+
+ idx_image = idx - sum(self.tuples_per_scene[:idx_scene])
+
+ return idx_scene, idx_image
+
+ def _get_other_random_scene_and_image_id(self, scene_id_to_exclude: int) -> Tuple[int, int]:
+ possible_scene_ids = list(range(len(self.scenes)))
+ possible_scene_ids.remove(scene_id_to_exclude)
+
+ idx_scene = random.choice(possible_scene_ids)
+ idx_image = random.randint(0, len(self.scenes[idx_scene]) - 1)
+
+ return idx_scene, idx_image
+
+
+class Scene:
+ def __init__(self, root_path, scene_data: Dict[str, Any], max_size_scene) -> None:
+ self.root_path = root_path
+ self.image_path = Path(scene_data["image_path"])
+ self.image_names = scene_data["images"]
+
+ # randomly sample tuples
+ if max_size_scene > 0:
+ self.tuples = random.sample(scene_data["tuples"], min(max_size_scene, len(scene_data["tuples"])))
+
+ def __len__(self) -> int:
+ return len(self.tuples)
+
+ def __getitem__(self, idx: int) -> Tuple[str, str]:
+ idx_1, idx_2 = random.sample([0, 1, 2], 2)
+
+ idx_1 = self.tuples[idx][idx_1]
+ idx_2 = self.tuples[idx][idx_2]
+
+ path_image_1 = str(self.root_path / self.image_path / self.image_names[idx_1])
+ path_image_2 = str(self.root_path / self.image_path / self.image_names[idx_2])
+
+ return path_image_1, path_image_2
diff --git a/imcui/third_party/RIPE/ripe/data/datasets/tokyo247.py b/imcui/third_party/RIPE/ripe/data/datasets/tokyo247.py
new file mode 100644
index 0000000000000000000000000000000000000000..6790a281909f25c8c7911cf2706aba4208181f45
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/data/datasets/tokyo247.py
@@ -0,0 +1,134 @@
+import os
+import random
+from glob import glob
+from typing import Any, Callable, Optional
+
+import torch
+from torch.utils.data import Dataset
+from torchvision.io import read_image
+
+from ripe import utils
+from ripe.data.data_transforms import Compose
+
+log = utils.get_pylogger(__name__)
+
+
+class Tokyo247(Dataset):
+ def __init__(
+ self,
+ root: str,
+ stage: str = "train",
+ transforms: Optional[Callable] = None,
+ positive_only: bool = False,
+ ):
+ if stage != "train":
+ raise ValueError("Tokyo247Dataset only supports the 'train' stage.")
+
+ # check if the root directory exists
+ if not os.path.isdir(root):
+ raise FileNotFoundError(f"Directory {root} does not exist.")
+
+ self.root_dir = root
+ self.transforms = transforms if transforms is not None else Compose([])
+ self.positive_only = positive_only
+
+ self.image_paths = []
+ self.positive_pairs = []
+
+ # Collect images grouped by location folder
+ self.locations = {}
+ for location_rough in sorted(os.listdir(self.root_dir)):
+ location_rough_path = os.path.join(self.root_dir, location_rough)
+
+ # check if the location_rough_path is a directory
+ if not os.path.isdir(location_rough_path):
+ continue
+
+ for location_fine in sorted(os.listdir(location_rough_path)):
+ location_fine_path = os.path.join(self.root_dir, location_rough, location_fine)
+
+ if os.path.isdir(location_fine_path):
+ images = sorted(
+ glob(os.path.join(location_fine_path, "*.png")),
+ key=lambda i: int(i[-7:-4]),
+ )
+ if len(images) >= 12:
+ self.locations[location_fine] = images
+ self.image_paths.extend(images)
+
+ # Generate positive pairs
+ for _, images in self.locations.items():
+ for i in range(len(images) - 1):
+ self.positive_pairs.append((images[i], images[i + 1]))
+ self.positive_pairs.append((images[-1], images[0]))
+
+ if positive_only:
+ log.warning("Using only positive pairs!")
+
+ log.info(f"Found {len(self.positive_pairs)} image pairs.")
+
+ def __len__(self):
+ if self.positive_only:
+ return len(self.positive_pairs)
+ return 2 * len(self.positive_pairs)
+
+ def __getitem__(self, idx):
+ sample: Any = {}
+
+ positive_sample = (idx % 2 == 0) or (self.positive_only)
+ if not self.positive_only:
+ idx = idx // 2
+
+ sample["label"] = positive_sample
+
+ if positive_sample: # Positive pair
+ img1_path, img2_path = self.positive_pairs[idx]
+
+ assert os.path.dirname(img1_path) == os.path.dirname(img2_path), (
+ f"Source and target image mismatch: {img1_path} vs {img2_path}"
+ )
+
+ homography = torch.eye(3, dtype=torch.float32)
+ else: # Negative pair
+ img1_path = random.choice(self.image_paths)
+ img2_path = random.choice(self.image_paths)
+
+ # Ensure images are from different folders
+ esc = 0
+ while os.path.dirname(img1_path) == os.path.dirname(img2_path):
+ img2_path = random.choice(self.image_paths)
+
+ esc += 1
+ if esc > 100:
+ raise RuntimeError("Could not find a negative pair.")
+
+ assert os.path.dirname(img1_path) != os.path.dirname(img2_path), (
+ f"Source and target image match for negative pair: {img1_path} vs {img2_path}"
+ )
+
+ homography = torch.zeros((3, 3), dtype=torch.float32)
+
+ sample["src_path"] = img1_path
+ sample["trg_path"] = img2_path
+
+ # Load images
+ src_img = read_image(sample["src_path"]) / 255.0
+ trg_img = read_image(sample["trg_path"]) / 255.0
+
+ _, H_src, W_src = src_img.shape
+ _, H_trg, W_trg = src_img.shape
+
+ src_mask = torch.ones((1, H_src, W_src), dtype=torch.uint8)
+ trg_mask = torch.ones((1, H_trg, W_trg), dtype=torch.uint8)
+
+ # Apply transformations
+ if self.transforms:
+ src_img, trg_img, src_mask, trg_mask, _ = self.transforms(src_img, trg_img, src_mask, trg_mask, homography)
+
+ sample["src_image"] = src_img
+ sample["trg_image"] = trg_img
+ sample["src_mask"] = src_mask.to(torch.bool)
+ sample["trg_mask"] = trg_mask.to(torch.bool)
+ sample["homography"] = homography
+
+ return sample
diff --git a/imcui/third_party/RIPE/ripe/losses/__init__.py b/imcui/third_party/RIPE/ripe/losses/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/imcui/third_party/RIPE/ripe/losses/contrastive_loss.py b/imcui/third_party/RIPE/ripe/losses/contrastive_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..fce681bb1bdafb25f6e64874648f5ca02df391c0
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/losses/contrastive_loss.py
@@ -0,0 +1,88 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def second_nearest_neighbor(desc1, desc2):
+ if desc2.shape[0] < 2: # We cannot perform snn check, so output empty matches
+ raise ValueError("desc2 should have at least 2 descriptors")
+
+ dist = torch.cdist(desc1, desc2, p=2)
+
+ vals, idxs = torch.topk(dist, 2, dim=1, largest=False)
+ idxs_in_2 = idxs[:, 1]
+ idxs_in_1 = torch.arange(0, idxs_in_2.size(0), device=dist.device)
+
+ matches_idxs = torch.cat([idxs_in_1.view(-1, 1), idxs_in_2.view(-1, 1)], 1)
+
+ return vals[:, 1].view(-1, 1), matches_idxs
+
+
+def contrastive_loss(
+ desc1,
+ desc2,
+ matches,
+ inliers,
+ label,
+ logits_1,
+ logits_2,
+ pos_margin=1.0,
+ neg_margin=1.0,
+):
+ if inliers.sum() < 8: # if there are too few inliers, calculate loss on all matches
+ inliers = torch.ones_like(inliers)
+
+ matched_inliers_descs1 = desc1[matches[:, 0][inliers]]
+ matched_inliers_descs2 = desc2[matches[:, 1][inliers]]
+
+ if logits_1 is not None and logits_2 is not None:
+ matched_inliers_logits1 = logits_1[matches[:, 0][inliers]]
+ matched_inliers_logits2 = logits_2[matches[:, 1][inliers]]
+ logits = torch.minimum(matched_inliers_logits1, matched_inliers_logits2)
+ else:
+ logits = torch.ones_like(matches[:, 0][inliers])
+
+ if label:
+ snn_match_dists_1, idx1 = second_nearest_neighbor(matched_inliers_descs1, desc2)
+ snn_match_dists_2, idx2 = second_nearest_neighbor(matched_inliers_descs2, desc1)
+
+ dists = torch.hstack((snn_match_dists_1, snn_match_dists_2))
+ min_dists_idx = torch.min(dists, dim=1).indices.unsqueeze(1)
+
+ dists_hard = torch.gather(dists, 1, min_dists_idx).squeeze(-1)
+ dists_pos = F.pairwise_distance(matched_inliers_descs1, matched_inliers_descs2)
+
+ contrastive_loss = torch.clamp(pos_margin + dists_pos - dists_hard, min=0.0)
+
+ contrastive_loss = contrastive_loss * logits
+
+ contrastive_loss = contrastive_loss.sum() / (logits.sum() + 1e-8) # small epsilon to avoid division by zero
+ else:
+ dists = F.pairwise_distance(matched_inliers_descs1, matched_inliers_descs2)
+ contrastive_loss = torch.clamp(neg_margin - dists, min=0.0)
+
+ contrastive_loss = contrastive_loss * logits
+
+ contrastive_loss = contrastive_loss.sum() / (logits.sum() + 1e-8) # small epsilon to avoid division by zero
+
+ return contrastive_loss
+
+
+class ContrastiveLoss(nn.Module):
+ def __init__(self, pos_margin=1.0, neg_margin=1.0):
+ super().__init__()
+ self.pos_margin = pos_margin
+ self.neg_margin = neg_margin
+
+ def forward(self, desc1, desc2, matches, inliers, label, logits_1=None, logits_2=None):
+ return contrastive_loss(
+ desc1,
+ desc2,
+ matches,
+ inliers,
+ label,
+ logits_1,
+ logits_2,
+ self.pos_margin,
+ self.neg_margin,
+ )
diff --git a/imcui/third_party/RIPE/ripe/matcher/__init__.py b/imcui/third_party/RIPE/ripe/matcher/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/imcui/third_party/RIPE/ripe/matcher/concurrent_matcher.py b/imcui/third_party/RIPE/ripe/matcher/concurrent_matcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..19d3cb8c55861050a787514896707d945b0e6c3e
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/matcher/concurrent_matcher.py
@@ -0,0 +1,97 @@
+import concurrent.futures
+
+import torch
+
+
+class ConcurrentMatcher:
+ """A class that performs matching and geometric filtering in parallel using a thread pool executor.
+ It matches keypoints from two sets of descriptors and applies a robust estimator to filter the matches based on geometric constraints.
+
+ Args:
+ matcher (callable): A callable that takes two sets of descriptors and returns distances and indices of matches.
+ robust_estimator (callable): A callable that estimates a geometric transformation and returns inliers.
+ min_num_matches (int, optional): Minimum number of matches required to perform geometric filtering. Defaults to 8.
+ max_workers (int, optional): Maximum number of threads in the thread pool executor. Defaults to 12.
+ """
+
+ def __init__(self, matcher, robust_estimator, min_num_matches=8, max_workers=12):
+ self.matcher = matcher
+ self.robust_estimator = robust_estimator
+ self.min_num_matches = min_num_matches
+
+ self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers)
+
+ @torch.no_grad()
+ def __call__(
+ self,
+ kpts1,
+ kpts2,
+ pdesc1,
+ pdesc2,
+ selected_mask1,
+ selected_mask2,
+ inl_th,
+ label=None,
+ ):
+ dev = pdesc1.device
+ B = pdesc1.shape[0]
+
+ batch_rel_idx_matches = [None] * B
+ batch_idx_matches = [None] * B
+ future_results = [None] * B
+
+ for b in range(B):
+ if selected_mask1[b].sum() < 16 or selected_mask2[b].sum() < 16:
+ continue
+
+ dists, idx_matches = self.matcher(pdesc1[b][selected_mask1[b]], pdesc2[b][selected_mask2[b]])
+
+ batch_rel_idx_matches[b] = idx_matches.clone()
+
+ # calculate ABSOLUTE indexes
+ idx_matches[:, 0] = torch.nonzero(selected_mask1[b], as_tuple=False)[idx_matches[:, 0]].squeeze()
+ idx_matches[:, 1] = torch.nonzero(selected_mask2[b], as_tuple=False)[idx_matches[:, 1]].squeeze()
+
+ batch_idx_matches[b] = idx_matches
+
+ # if not enough matches
+ if idx_matches.shape[0] < self.min_num_matches:
+ ransac_inliers = torch.zeros((idx_matches.shape[0]), device=dev).bool()
+ future_results[b] = (None, ransac_inliers)
+ continue
+
+ # use label information to exclude negative pairs from geometric filtering process -> enforces more descriminative descriptors
+ if label is not None and label[b] == 0:
+ ransac_inliers = torch.ones((idx_matches.shape[0]), device=dev).bool()
+ future_results[b] = (None, ransac_inliers)
+ continue
+
+ mkpts1 = kpts1[b][idx_matches[:, 0]]
+ mkpts2 = kpts2[b][idx_matches[:, 1]]
+
+ future_results[b] = self.executor.submit(self.robust_estimator, mkpts1, mkpts2, inl_th)
+
+ batch_ransac_inliers = [None] * B
+ batch_Fm = [None] * B
+
+ for b in range(B):
+ future_result = future_results[b]
+ if future_result is None:
+ ransac_inliers = None
+ Fm = None
+ elif isinstance(future_result, tuple):
+ Fm, ransac_inliers = future_result
+ else:
+ Fm, ransac_inliers = future_result.result()
+
+ # if no inliers
+ if ransac_inliers.sum() == 0:
+ ransac_inliers = ransac_inliers.squeeze(
+ -1
+ ) # kornia.geometry.ransac.RANSAC returns (N, 1) tensor if no inliers and (N,) tensor if inliers
+ Fm = None
+
+ batch_ransac_inliers[b] = ransac_inliers
+ batch_Fm[b] = Fm
+
+ return batch_rel_idx_matches, batch_idx_matches, batch_ransac_inliers, batch_Fm
diff --git a/imcui/third_party/RIPE/ripe/matcher/pose_estimator_poselib.py b/imcui/third_party/RIPE/ripe/matcher/pose_estimator_poselib.py
new file mode 100644
index 0000000000000000000000000000000000000000..bff13ba8e671c9bd136ae97b5702d2ed20a49ce4
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/matcher/pose_estimator_poselib.py
@@ -0,0 +1,31 @@
+import poselib
+import torch
+
+
+class PoseLibRelativePoseEstimator:
+ """PoseLibRelativePoseEstimator estimates the fundamental matrix using poselib library.
+ It uses the poselib's estimate_fundamental function to compute the fundamental matrix and inliers based on the provided points.
+ Args:
+ None
+ """
+
+ def __init__(self):
+ pass
+
+ def __call__(self, pts0, pts1, inl_th):
+ F, info = poselib.estimate_fundamental(
+ pts0.cpu().numpy(),
+ pts1.cpu().numpy(),
+ {
+ "max_epipolar_error": inl_th,
+ },
+ )
+
+ success = F is not None
+ if success:
+ inliers = info.pop("inliers")
+ inliers = torch.tensor(inliers, dtype=torch.bool, device=pts0.device)
+ else:
+ inliers = torch.zeros(pts0.shape[0], dtype=torch.bool, device=pts0.device)
+
+ return F, inliers
diff --git a/imcui/third_party/RIPE/ripe/model_zoo/__init__.py b/imcui/third_party/RIPE/ripe/model_zoo/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdba1cbb458af322fd598d9920335c1bd04ca4b6
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/model_zoo/__init__.py
@@ -0,0 +1 @@
+from .vgg_hyper import vgg_hyper # noqa: F401
diff --git a/imcui/third_party/RIPE/ripe/model_zoo/vgg_hyper.py b/imcui/third_party/RIPE/ripe/model_zoo/vgg_hyper.py
new file mode 100644
index 0000000000000000000000000000000000000000..35473f98f8769c80da062f636575da7946065d6f
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/model_zoo/vgg_hyper.py
@@ -0,0 +1,39 @@
+from pathlib import Path
+
+import torch
+
+from ripe.models.backbones.vgg import VGG
+from ripe.models.ripe import RIPE
+from ripe.models.upsampler.hypercolumn_features import HyperColumnFeatures
+
+
+def vgg_hyper(model_path: Path = None, desc_shares=None):
+ if model_path is None:
+ # check if the weights file exists in the current directory
+ model_path = Path("/tmp/ripe_weights.pth")
+
+ if model_path.exists():
+ print(f"Using existing weights from {model_path}")
+ else:
+ print("Weights file not found. Downloading ...")
+ torch.hub.download_url_to_file(
+ "https://cvg.hhi.fraunhofer.de/RIPE/ripe_weights.pth",
+ "/tmp/ripe_weights.pth",
+ )
+ else:
+ if not model_path.exists():
+ print(f"Error: {model_path} does not exist.")
+ raise FileNotFoundError(f"Error: {model_path} does not exist.")
+
+ backbone = VGG(pretrained=False)
+ upsampler = HyperColumnFeatures()
+
+ extractor = RIPE(
+ net=backbone,
+ upsampler=upsampler,
+ desc_shares=desc_shares,
+ )
+
+ extractor.load_state_dict(torch.load(model_path, map_location="cpu"))
+
+ return extractor
diff --git a/imcui/third_party/RIPE/ripe/models/__init__.py b/imcui/third_party/RIPE/ripe/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/imcui/third_party/RIPE/ripe/models/backbones/__init__.py b/imcui/third_party/RIPE/ripe/models/backbones/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/imcui/third_party/RIPE/ripe/models/backbones/backbone_base.py b/imcui/third_party/RIPE/ripe/models/backbones/backbone_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..33e448aef7e40609c6f682c24d425d23ad5823d3
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/models/backbones/backbone_base.py
@@ -0,0 +1,61 @@
+import torch
+import torch.nn as nn
+
+
+class BackboneBase(nn.Module):
+ """Base class for backbone networks. Provides a standard interface for preprocessing inputs and
+ defining encoder dimensions.
+
+ Args:
+ nchannels (int): Number of input channels.
+ use_instance_norm (bool): Whether to apply instance normalization.
+ """
+
+ def __init__(self, nchannels=3, use_instance_norm=False):
+ super().__init__()
+ assert nchannels > 0, "Number of channels must be positive."
+ self.nchannels = nchannels
+ self.use_instance_norm = use_instance_norm
+ self.norm = nn.InstanceNorm2d(nchannels) if use_instance_norm else None
+
+ def get_dim_layers_encoder(self):
+ """Get dimensions of encoder layers."""
+ raise NotImplementedError("Subclasses must implement this method.")
+
+ def _forward(self, x):
+ """Define the forward pass for the backbone."""
+ raise NotImplementedError("Subclasses must implement this method.")
+
+ def forward(self, x: torch.Tensor, preprocess=True):
+ """Forward pass with optional preprocessing.
+
+ Args:
+ x (Tensor): Input tensor.
+ preprocess (bool): Whether to apply channel reduction.
+ """
+ if preprocess:
+ if x.dim() != 4:
+ if x.dim() == 2 and x.shape[0] > 3 and x.shape[1] > 3:
+ x = x.unsqueeze(0).unsqueeze(0)
+ elif x.dim() == 3:
+ x = x.unsqueeze(0)
+ else:
+ raise ValueError(f"Unexpected input shape: {x.shape}")
+
+ if self.nchannels == 1 and x.shape[1] != 1:
+ if len(x.shape) == 4: # Assumes (batch, channel, height, width)
+ x = torch.mean(x, axis=1, keepdim=True)
+ else:
+ raise ValueError(f"Unexpected input shape: {x.shape}")
+
+ #
+ if self.nchannels == 3 and x.shape[1] == 1:
+ if len(x.shape) == 4:
+ x = x.repeat(1, 3, 1, 1)
+ else:
+ raise ValueError(f"Unexpected input shape: {x.shape}")
+
+ if self.use_instance_norm:
+ x = self.norm(x)
+
+ return self._forward(x)
diff --git a/imcui/third_party/RIPE/ripe/models/backbones/vgg.py b/imcui/third_party/RIPE/ripe/models/backbones/vgg.py
new file mode 100644
index 0000000000000000000000000000000000000000..87d67414dd719e256a1bc0f2d5ad88e1f4286dde
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/models/backbones/vgg.py
@@ -0,0 +1,99 @@
+# adapted from: https://github.com/Parskatt/DeDoDe/blob/main/DeDoDe/encoder.py and https://github.com/Parskatt/DeDoDe/blob/main/DeDoDe/decoder.py
+
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .backbone_base import BackboneBase
+from .vgg_utils import VGG19, ConvRefiner, Decoder
+
+
+class VGG(BackboneBase):
+ def __init__(self, nchannels=3, pretrained=True, use_instance_norm=True, mode="dect"):
+ super().__init__(nchannels=nchannels, use_instance_norm=use_instance_norm)
+
+ self.nchannels = nchannels
+ self.mode = mode
+
+ if self.mode not in ["dect", "desc", "dect+desc"]:
+ raise ValueError("mode should be 'dect', 'desc' or 'dect+desc'")
+
+ NUM_OUTPUT_CHANNELS, hidden_blocks = self._get_mode_params(mode)
+ conv_refiner = self._create_conv_refiner(NUM_OUTPUT_CHANNELS, hidden_blocks)
+
+ self.encoder = VGG19(pretrained=pretrained, num_input_channels=nchannels)
+ self.decoder = Decoder(conv_refiner, num_prototypes=NUM_OUTPUT_CHANNELS)
+
+ def _get_mode_params(self, mode):
+ """Get the number of output channels and the number of hidden blocks for the ConvRefiner.
+
+ Depending on the mode, the ConvRefiner will have a different number of output channels.
+ """
+
+ if mode == "dect":
+ return 1, 8
+ elif mode == "desc":
+ return 256, 5
+ elif mode == "dect+desc":
+ return 256 + 1, 8
+
+ def _create_conv_refiner(self, num_output_channels, hidden_blocks):
+ return nn.ModuleDict(
+ {
+ "8": ConvRefiner(
+ 512,
+ 512,
+ 256 + num_output_channels,
+ hidden_blocks=hidden_blocks,
+ residual=True,
+ ),
+ "4": ConvRefiner(
+ 256 + 256,
+ 256,
+ 128 + num_output_channels,
+ hidden_blocks=hidden_blocks,
+ residual=True,
+ ),
+ "2": ConvRefiner(
+ 128 + 128,
+ 128,
+ 64 + num_output_channels,
+ hidden_blocks=hidden_blocks,
+ residual=True,
+ ),
+ "1": ConvRefiner(
+ 64 + 64,
+ 64,
+ 1 + num_output_channels,
+ hidden_blocks=hidden_blocks,
+ residual=True,
+ ),
+ }
+ )
+
+ def get_dim_layers_encoder(self):
+ return self.encoder.get_dim_layers()
+
+ def _forward(self, x):
+ features, sizes = self.encoder(x)
+ output = 0
+ context = None
+ scales = self.decoder.scales
+ for idx, (feature_map, scale) in enumerate(zip(reversed(features), scales)):
+ delta_descriptor, context = self.decoder(feature_map, scale=scale, context=context)
+ output = output + delta_descriptor
+ if idx < len(scales) - 1:
+ size = sizes[-(idx + 2)]
+ output = F.interpolate(output, size=size, mode="bilinear", align_corners=False)
+ context = F.interpolate(context, size=size, mode="bilinear", align_corners=False)
+
+ if self.mode == "dect":
+ return {"heatmap": output, "coarse_descs": features}
+ elif self.mode == "desc":
+ return {"fine_descs": output, "coarse_descs": features}
+ elif self.mode == "dect+desc":
+ logits = output[:, :1].contiguous()
+ descs = output[:, 1:].contiguous()
+
+ return {"heatmap": logits, "fine_descs": descs, "coarse_descs": features}
+ else:
+ raise ValueError("mode should be 'dect', 'desc' or 'dect+desc'")
diff --git a/imcui/third_party/RIPE/ripe/models/backbones/vgg_utils.py b/imcui/third_party/RIPE/ripe/models/backbones/vgg_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8a8f3ac6a0a8f29e835bae0b997b1358a06ff7f
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/models/backbones/vgg_utils.py
@@ -0,0 +1,143 @@
+# adapted from: https://github.com/Parskatt/DeDoDe/blob/main/DeDoDe/encoder.py and https://github.com/Parskatt/DeDoDe/blob/main/DeDoDe/decoder.py
+
+import torch
+import torch.nn as nn
+import torchvision.models as tvm
+
+from ripe import utils
+
+log = utils.get_pylogger(__name__)
+
+
+class Decoder(nn.Module):
+ def __init__(self, layers, *args, super_resolution=False, num_prototypes=1, **kwargs) -> None:
+ super().__init__(*args, **kwargs)
+ self.layers = layers
+ self.scales = self.layers.keys()
+ self.super_resolution = super_resolution
+ self.num_prototypes = num_prototypes
+
+ def forward(self, features, context=None, scale=None):
+ if context is not None:
+ features = torch.cat((features, context), dim=1)
+ stuff = self.layers[scale](features)
+ logits, context = (
+ stuff[:, : self.num_prototypes],
+ stuff[:, self.num_prototypes :],
+ )
+ return logits, context
+
+
+class ConvRefiner(nn.Module):
+ def __init__(
+ self,
+ in_dim=6,
+ hidden_dim=16,
+ out_dim=2,
+ dw=True,
+ kernel_size=5,
+ hidden_blocks=5,
+ residual=False,
+ ):
+ super().__init__()
+ self.block1 = self.create_block(
+ in_dim,
+ hidden_dim,
+ dw=False,
+ kernel_size=1,
+ )
+ self.hidden_blocks = nn.Sequential(
+ *[
+ self.create_block(
+ hidden_dim,
+ hidden_dim,
+ dw=dw,
+ kernel_size=kernel_size,
+ )
+ for hb in range(hidden_blocks)
+ ]
+ )
+ self.hidden_blocks = self.hidden_blocks
+ self.out_conv = nn.Conv2d(hidden_dim, out_dim, 1, 1, 0)
+ self.residual = residual
+
+ def create_block(
+ self,
+ in_dim,
+ out_dim,
+ dw=True,
+ kernel_size=5,
+ bias=True,
+ norm_type=nn.BatchNorm2d,
+ ):
+ num_groups = 1 if not dw else in_dim
+ if dw:
+ assert out_dim % in_dim == 0, "outdim must be divisible by indim for depthwise"
+ conv1 = nn.Conv2d(
+ in_dim,
+ out_dim,
+ kernel_size=kernel_size,
+ stride=1,
+ padding=kernel_size // 2,
+ groups=num_groups,
+ bias=bias,
+ )
+ norm = norm_type(out_dim) if norm_type is nn.BatchNorm2d else norm_type(num_channels=out_dim)
+ relu = nn.ReLU(inplace=True)
+ conv2 = nn.Conv2d(out_dim, out_dim, 1, 1, 0)
+ return nn.Sequential(conv1, norm, relu, conv2)
+
+ def forward(self, feats):
+ b, c, hs, ws = feats.shape
+ x0 = self.block1(feats)
+ x = self.hidden_blocks(x0)
+ if self.residual:
+ x = (x + x0) / 1.4
+ x = self.out_conv(x)
+ return x
+
+
+class VGG19(nn.Module):
+ def __init__(self, pretrained=False, num_input_channels=3) -> None:
+ super().__init__()
+ self.layers = nn.ModuleList(tvm.vgg19_bn(pretrained=pretrained).features[:40])
+ # Maxpool layers: 6, 13, 26, 39
+
+ if num_input_channels != 3:
+ log.info(f"Changing input channels from 3 to {num_input_channels}")
+ self.layers[0] = nn.Conv2d(num_input_channels, 64, 3, 1, 1)
+
+ def get_dim_layers(self):
+ return [64, 128, 256, 512]
+
+ def forward(self, x, **kwargs):
+ feats = []
+ sizes = []
+ for layer in self.layers:
+ if isinstance(layer, nn.MaxPool2d):
+ feats.append(x)
+ sizes.append(x.shape[-2:])
+ x = layer(x)
+ return feats, sizes
+
+
+class VGG(nn.Module):
+ def __init__(self, size="19", pretrained=False) -> None:
+ super().__init__()
+ if size == "11":
+ self.layers = nn.ModuleList(tvm.vgg11_bn(pretrained=pretrained).features[:22])
+ elif size == "13":
+ self.layers = nn.ModuleList(tvm.vgg13_bn(pretrained=pretrained).features[:28])
+ elif size == "19":
+ self.layers = nn.ModuleList(tvm.vgg19_bn(pretrained=pretrained).features[:40])
+ # Maxpool layers: 6, 13, 26, 39
+
+ def forward(self, x, **kwargs):
+ feats = []
+ sizes = []
+ for layer in self.layers:
+ if isinstance(layer, nn.MaxPool2d):
+ feats.append(x)
+ sizes.append(x.shape[-2:])
+ x = layer(x)
+ return feats, sizes
diff --git a/imcui/third_party/RIPE/ripe/models/ripe.py b/imcui/third_party/RIPE/ripe/models/ripe.py
new file mode 100644
index 0000000000000000000000000000000000000000..8207508be514976090e87c46a90c0c913cf17b90
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/models/ripe.py
@@ -0,0 +1,303 @@
+from typing import List, Optional
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ripe import utils
+from ripe.utils.utils import gridify
+
+log = utils.get_pylogger(__name__)
+
+
+class KeypointSampler(nn.Module):
+ """
+ Sample keypoints according to a Heatmap
+ Adapted from: https://github.com/verlab/DALF_CVPR_2023/blob/main/modules/models/DALF.py
+ """
+
+ def __init__(self, window_size=8):
+ super().__init__()
+ self.window_size = window_size
+ self.idx_cells = None # Cache for meshgrid indices
+
+ def sample(self, grid):
+ """
+ Sample keypoints given a grid where each cell has logits stacked in last dimension
+ Input
+ grid: [B, C, H//w, W//w, w*w]
+
+ Returns
+ log_probs: [B, C, H//w, W//w ] - logprobs of selected samples
+ choices: [B, C, H//w, W//w] indices of choices
+ accept_mask: [B, C, H//w, W//w] mask of accepted keypoints
+
+ """
+ chooser = torch.distributions.Categorical(logits=grid)
+ choices = chooser.sample()
+ logits_selected = torch.gather(grid, -1, choices.unsqueeze(-1)).squeeze(-1)
+
+ flipper = torch.distributions.Bernoulli(logits=logits_selected)
+ accepted_choices = flipper.sample()
+
+ # Sum log-probabilities is equivalent to multiplying the probabilities
+ log_probs = chooser.log_prob(choices) + flipper.log_prob(accepted_choices)
+
+ accept_mask = accepted_choices.gt(0)
+
+ return (
+ log_probs.squeeze(1),
+ choices,
+ accept_mask.squeeze(1),
+ logits_selected.squeeze(1),
+ )
+
+ def precompute_idx_cells(self, H, W, device):
+ idx_cells = gridify(
+ torch.dstack(
+ torch.meshgrid(
+ torch.arange(H, dtype=torch.float32, device=device),
+ torch.arange(W, dtype=torch.float32, device=device),
+ )
+ )
+ .permute(2, 0, 1)
+ .unsqueeze(0)
+ .expand(1, -1, -1, -1),
+ window_size=self.window_size,
+ )
+
+ return idx_cells
+
+ def forward(self, x, mask_padding=None):
+ """
+ Sample keypoints from a heatmap
+ Input
+ x: [B, C, H, W] Heatmap
+ mask_padding: [B, 1, H, W] Mask for padding (optional)
+ Returns
+ keypoints: [B, H//w, W//w, 2] Keypoints in (x, y) format
+ log_probs: [B, H//w, W//w] Log probabilities of selected keypoints
+ mask: [B, H//w, W//w] Mask of accepted keypoints
+ mask_padding: [B, 1, H//w, W//w] Mask of padding (optional)
+ logits_selected: [B, H//w, W//w] Logits of selected keypoints
+ """
+
+ B, C, H, W = x.shape
+
+ keypoint_cells = gridify(x, self.window_size)
+
+ mask_padding = (
+ (torch.min(gridify(mask_padding, self.window_size), dim=4).values) if mask_padding is not None else None
+ )
+
+ if self.idx_cells is None or self.idx_cells.shape[2:4] != (
+ H // self.window_size,
+ W // self.window_size,
+ ):
+ self.idx_cells = self.precompute_idx_cells(H, W, x.device)
+
+ log_probs, idx, mask, logits_selected = self.sample(keypoint_cells)
+
+ keypoints = (
+ torch.gather(
+ self.idx_cells.expand(B, -1, -1, -1, -1),
+ -1,
+ idx.repeat(1, 2, 1, 1).unsqueeze(-1),
+ )
+ .squeeze(-1)
+ .permute(0, 2, 3, 1)
+ )
+
+ # flip keypoints to (x, y) format
+ return keypoints.flip(-1), log_probs, mask, mask_padding, logits_selected
+
+
+class RIPE(nn.Module):
+ """
+ Base class for extracting keypoints and descriptors
+ Input
+ x: [B, C, H, W] Images
+
+ Returns
+ kpts:
+ list of size [B] with detected keypoints
+ descs:
+ list of size [B] with descriptors
+ """
+
+ def __init__(
+ self,
+ net,
+ upsampler,
+ window_size: int = 8,
+ non_linearity_dect=None,
+ desc_shares: Optional[List[int]] = None,
+ descriptor_dim: int = 256,
+ device=None,
+ ):
+ super().__init__()
+ self.net = net
+
+ self.detector = KeypointSampler(window_size)
+ self.upsampler = upsampler
+ self.sampler = None
+ self.window_size = window_size
+ self.non_linearity_dect = non_linearity_dect if non_linearity_dect is not None else nn.Identity()
+
+ log.info(f"Training with window size {window_size}.")
+ log.info(f"Use {non_linearity_dect} as final non-linearity before the detection heatmap.")
+
+ dim_coarse_desc = self.get_dim_raw_desc()
+
+ if desc_shares is not None:
+ assert upsampler.name == "HyperColumnFeatures", (
+ "Individual descriptor convolutions are only supported with HyperColumnFeatures"
+ )
+ assert len(desc_shares) == 4, "desc_shares should have 4 elements"
+ assert sum(desc_shares) == descriptor_dim, f"sum of desc_shares should be {descriptor_dim}"
+
+ self.conv_dim_reduction_coarse_desc = nn.ModuleList()
+
+ for dim_in, dim_out in zip(dim_coarse_desc, desc_shares):
+ log.info(f"Training dim reduction descriptor with {dim_in} -> {dim_out} 1x1 conv")
+ self.conv_dim_reduction_coarse_desc.append(
+ nn.Conv1d(dim_in, dim_out, kernel_size=1, stride=1, padding=0)
+ )
+ else:
+ if descriptor_dim is not None:
+ log.info(f"Training dim reduction descriptor with {sum(dim_coarse_desc)} -> {descriptor_dim} 1x1 conv")
+ self.conv_dim_reduction_coarse_desc = nn.Conv1d(
+ sum(dim_coarse_desc),
+ descriptor_dim,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ )
+ else:
+ log.warning(
+ f"No descriptor dimension specified, no 1x1 conv will be applied! Direct usage of {sum(dim_coarse_desc)}-dimensional raw descriptor"
+ )
+ self.conv_dim_reduction_coarse_desc = nn.Identity()
+
+ def get_dim_raw_desc(self):
+ layers_dims_encoder = self.net.get_dim_layers_encoder()
+
+ if self.upsampler.name == "InterpolateSparse2d":
+ return [layers_dims_encoder[-1]]
+ elif self.upsampler.name == "HyperColumnFeatures":
+ return layers_dims_encoder
+ else:
+ raise ValueError(f"Unknown interpolator {self.upsampler.name}")
+
+ @torch.inference_mode()
+ def detectAndCompute(self, img, threshold=0.5, top_k=2048, output_aux=False):
+ self.train(False)
+
+ if img.dim() == 3:
+ img = img.unsqueeze(0)
+
+ out = self(img, training=False)
+ B, K, H, W = out["heatmap"].shape
+
+ assert B == 1, "Batch size should be 1"
+
+ kpts = [{"xy": self.NMS(out["heatmap"][b], threshold)} for b in range(B)]
+
+ if top_k is not None:
+ for b in range(B):
+ scores = out["heatmap"][b].squeeze(0)[kpts[b]["xy"][:, 1].long(), kpts[b]["xy"][:, 0].long()]
+ sorted_idx = torch.argsort(-scores)
+ kpts[b]["xy"] = kpts[b]["xy"][sorted_idx[:top_k]]
+ if "logprobs" in kpts[b]:
+ kpts[b]["logprobs"] = kpts[b]["xy"][sorted_idx[:top_k]]
+
+ if kpts[0]["xy"].shape[0] == 0:
+ raise RuntimeError("No keypoints detected")
+
+ # the following works for batch size 1 only
+
+ descs = self.get_descs(out["coarse_descs"], img, kpts[0]["xy"].unsqueeze(0), H, W)
+ descs = descs.squeeze(0)
+
+ score_map = out["heatmap"][0].squeeze(0)
+
+ kpts = kpts[0]["xy"]
+
+ scores = score_map[kpts[:, 1], kpts[:, 0]]
+ scores /= score_map.max()
+
+ sort_idx = torch.argsort(-scores)
+ kpts, descs, scores = kpts[sort_idx], descs[sort_idx], scores[sort_idx]
+
+ if output_aux:
+ return (
+ kpts.float(),
+ descs,
+ scores,
+ {
+ "heatmap": out["heatmap"],
+ "descs": out["coarse_descs"],
+ "conv": self.conv_dim_reduction_coarse_desc,
+ },
+ )
+
+ return kpts.float(), descs, scores
+
+ def NMS(self, x, threshold=3.0, kernel_size=3):
+ pad = kernel_size // 2
+ local_max = nn.MaxPool2d(kernel_size=kernel_size, stride=1, padding=pad)(x)
+
+ pos = (x == local_max) & (x > threshold)
+ return pos.nonzero()[..., 1:].flip(-1)
+
+ def get_descs(self, feature_map, guidance, kpts, H, W):
+ descs = self.upsampler(feature_map, kpts, H, W)
+
+ if isinstance(self.conv_dim_reduction_coarse_desc, nn.ModuleList):
+ # individual descriptor convolutions for each layer
+ desc_conv = []
+ for desc, conv in zip(descs, self.conv_dim_reduction_coarse_desc):
+ desc_conv.append(conv(desc.permute(0, 2, 1)).permute(0, 2, 1))
+ desc = torch.cat(desc_conv, dim=-1)
+ else:
+ desc = torch.cat(descs, dim=-1)
+ desc = self.conv_dim_reduction_coarse_desc(desc.permute(0, 2, 1)).permute(0, 2, 1)
+
+ desc = F.normalize(desc, dim=2)
+
+ return desc
+
+ def forward(self, x, mask_padding=None, training=False):
+ B, C, H, W = x.shape
+ out = self.net(x)
+ out["heatmap"] = self.non_linearity_dect(out["heatmap"])
+ # print(out['map'].shape, out['descr'].shape)
+ if training:
+ kpts, log_probs, mask, mask_padding, logits_selected = self.detector(out["heatmap"], mask_padding)
+
+ filter_A = kpts[:, :, :, 0] >= 16
+ filter_B = kpts[:, :, :, 1] >= 16
+ filter_C = kpts[:, :, :, 0] < W - 16
+ filter_D = kpts[:, :, :, 1] < H - 16
+ filter_all = filter_A * filter_B * filter_C * filter_D
+
+ mask = mask * filter_all
+
+ return (
+ kpts.view(B, -1, 2),
+ log_probs.view(B, -1),
+ mask.view(B, -1),
+ mask_padding.view(B, -1),
+ logits_selected.view(B, -1),
+ out,
+ )
+ else:
+ return out
+
+
+def output_number_trainable_params(model):
+ model_parameters = filter(lambda p: p.requires_grad, model.parameters())
+ nb_params = sum([np.prod(p.size()) for p in model_parameters])
+
+ print(f"Number of trainable parameters: {nb_params:d}")
diff --git a/imcui/third_party/RIPE/ripe/models/upsampler/hypercolumn_features.py b/imcui/third_party/RIPE/ripe/models/upsampler/hypercolumn_features.py
new file mode 100644
index 0000000000000000000000000000000000000000..13ad932785d8267c72500a5b86412c86849db4ad
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/models/upsampler/hypercolumn_features.py
@@ -0,0 +1,54 @@
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+
+class HyperColumnFeatures(nn.Module):
+ """
+ Interpolate 3D tensor given N sparse 2D positions
+ Input
+ x: list([C, H, W]) list of feature tensors at different scales (e.g. from a U-Net) -> extract hypercolumn features
+ pos: [N, 2] tensor of positions
+ H: int, height of the OUTPUT map
+ W: int, width of the OUTPUT map
+
+ Returns
+ [N, C] sampled features at 2d positions
+ """
+
+ def __init__(self, mode="bilinear"):
+ super().__init__()
+ self.mode = mode
+ self.name = "HyperColumnFeatures"
+
+ def normgrid(self, x, H, W):
+ return 2.0 * (x / (torch.tensor([W - 1, H - 1], device=x.device, dtype=x.dtype))) - 1.0
+
+ def extract_values_at_poses(self, x, pos, H, W):
+ """Extract values from tensor x at the positions given by pos.
+
+ Args:
+ - x (Tensor): Tensor of size (C, H, W).
+ - pos (Tensor): Tensor of size (N, 2) containing the x, y positions.
+
+ Returns:
+ - values (Tensor): Tensor of size (N, C) with the values from f at the positions given by p.
+ """
+
+ # check if grid is float32
+ if x.dtype != torch.float32:
+ x = x.to(torch.float32)
+
+ grid = self.normgrid(pos, H, W).unsqueeze(-2)
+
+ x = F.grid_sample(x, grid, mode=self.mode, align_corners=True)
+ return x.permute(0, 2, 3, 1).squeeze(-2)
+
+ def forward(self, x, pos, H, W):
+ descs = []
+
+ for layer in x:
+ desc = self.extract_values_at_poses(layer, pos, H, W)
+ descs.append(desc)
+
+ return descs
diff --git a/imcui/third_party/RIPE/ripe/models/upsampler/interpolate_sparse2d.py b/imcui/third_party/RIPE/ripe/models/upsampler/interpolate_sparse2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..9da0ac56f14ddb497975baed70209d51f20e4821
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/models/upsampler/interpolate_sparse2d.py
@@ -0,0 +1,37 @@
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+
+class InterpolateSparse2d(nn.Module):
+ """
+ Interpolate 3D tensor given N sparse 2D positions
+ Input
+ x: list([C, H, W]) feature tensors at different scales (e.g. from a U-Net), ONLY the last one is used
+ pos: [N, 2] tensor of positions
+ H: int, height of the OUTPUT map
+ W: int, width of the OUTPUT map
+
+ Returns
+ [N, C] sampled features at 2d positions
+ """
+
+ def __init__(self, mode="bicubic"):
+ super().__init__()
+ self.mode = mode
+ self.name = "InterpolateSparse2d"
+
+ def normgrid(self, x, H, W):
+ return 2.0 * (x / (torch.tensor([W - 1, H - 1], device=x.device, dtype=x.dtype))) - 1.0
+
+ def forward(self, x, pos, H, W):
+ x = x[-1] # only use the last layer
+
+ # check if grid is float32
+ if x.dtype != torch.float32:
+ x = x.to(torch.float32)
+
+ grid = self.normgrid(pos, H, W).unsqueeze(-2)
+
+ x = F.grid_sample(x, grid, mode=self.mode, align_corners=True)
+ return [x.permute(0, 2, 3, 1).squeeze(-2)]
diff --git a/imcui/third_party/RIPE/ripe/scheduler/__init__.py b/imcui/third_party/RIPE/ripe/scheduler/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/imcui/third_party/RIPE/ripe/scheduler/constant.py b/imcui/third_party/RIPE/ripe/scheduler/constant.py
new file mode 100644
index 0000000000000000000000000000000000000000..dcfee451da298cacbd2786255f9a5964e2d6b4ec
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/scheduler/constant.py
@@ -0,0 +1,6 @@
+class ConstantScheduler:
+ def __init__(self, value):
+ self.value = value
+
+ def __call__(self, step):
+ return self.value
diff --git a/imcui/third_party/RIPE/ripe/scheduler/expDecay.py b/imcui/third_party/RIPE/ripe/scheduler/expDecay.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a75316fc0669ed3bbe3291040179f54b3526fc4
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/scheduler/expDecay.py
@@ -0,0 +1,26 @@
+import numpy as np
+
+from ripe import utils
+
+log = utils.get_pylogger(__name__)
+
+
+class ExpDecay:
+ """Exponential decay scheduler.
+ args:
+ a: float, a + c = initial value
+ b: decay rate
+ c: float, final value
+
+ f(x) = a * e^(-b * x) + c
+ """
+
+ def __init__(self, a, b, c):
+ self.a = a
+ self.b = b
+ self.c = c
+
+ log.info(f"ExpDecay: a={a}, b={b}, c={c}")
+
+ def __call__(self, step):
+ return self.a * np.exp(-self.b * step) + self.c
diff --git a/imcui/third_party/RIPE/ripe/scheduler/linearLR.py b/imcui/third_party/RIPE/ripe/scheduler/linearLR.py
new file mode 100644
index 0000000000000000000000000000000000000000..b035e4d2a0e8d7c9907b92f5d870f2c6afac5f27
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/scheduler/linearLR.py
@@ -0,0 +1,37 @@
+class StepLinearLR:
+ """Decay the learning rate by a linearly changing factor at each STEP (not epoch).
+
+ Args:
+ optimizer (Optimizer): Wrapped optimizer.
+ num_steps (int): Total number of steps in the training process.
+ initial_lr (float): Initial learning rate.
+ final_lr (float): Final learning rate.
+ """
+
+ def __init__(self, optimizer, steps_init, num_steps, initial_lr, final_lr):
+ self.optimizer = optimizer
+ self.num_steps = num_steps
+ self.initial_lr = initial_lr
+ self.final_lr = final_lr
+ self.i_step = steps_init
+ self.decay_factor = (final_lr - initial_lr) / num_steps
+
+ def step(self):
+ """Decay the learning rate by decay_factor."""
+ self.i_step += 1
+
+ if self.i_step > self.num_steps:
+ return
+
+ lr = self.initial_lr + self.i_step * self.decay_factor
+ for param_group in self.optimizer.param_groups:
+ param_group["lr"] = lr
+
+ def get_lr(self):
+ return self.optimizer.param_groups[0]["lr"]
+
+ def get_last_lr(self):
+ return self.optimizer.param_groups[0]["lr"]
+
+ def get_step(self):
+ return self.i_step
diff --git a/imcui/third_party/RIPE/ripe/scheduler/linear_with_plateaus.py b/imcui/third_party/RIPE/ripe/scheduler/linear_with_plateaus.py
new file mode 100644
index 0000000000000000000000000000000000000000..92f8b4caae2934684a931415dc401701310ffb29
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/scheduler/linear_with_plateaus.py
@@ -0,0 +1,44 @@
+from ripe import utils
+
+log = utils.get_pylogger(__name__)
+
+
+class LinearWithPlateaus:
+ """Linear scheduler with plateaus.
+
+ Linearly increases from `start_val` to `end_val`.
+ Stays at `start_val` for `plateau_start_steps` steps and at `end_val` for `plateau_end_steps` steps.
+ Linearly changes from `start_val` to `end_val` during the remaining steps.
+ """
+
+ def __init__(
+ self,
+ start_val,
+ end_val,
+ steps_total,
+ rel_length_start_plateau=0.0,
+ rel_length_end_plateu=0.0,
+ ):
+ self.start_val = start_val
+ self.end_val = end_val
+ self.steps_total = steps_total
+ self.plateau_start_steps = steps_total * rel_length_start_plateau
+ self.plateau_end_steps = steps_total * rel_length_end_plateu
+
+ assert self.plateau_start_steps >= 0
+ assert self.plateau_end_steps >= 0
+ assert self.plateau_start_steps + self.plateau_end_steps <= self.steps_total
+
+ self.slope = (end_val - start_val) / (steps_total - self.plateau_start_steps - self.plateau_end_steps)
+
+ log.info(
+ f"LinearWithPlateaus: start_val={start_val}, end_val={end_val}, steps_total={steps_total}, "
+ f"plateau_start_steps={self.plateau_start_steps}, plateau_end_steps={self.plateau_end_steps}"
+ )
+
+ def __call__(self, step):
+ if step < self.plateau_start_steps:
+ return self.start_val
+ if step < self.steps_total - self.plateau_end_steps:
+ return self.start_val + self.slope * (step - self.plateau_start_steps)
+ return self.end_val
diff --git a/imcui/third_party/RIPE/ripe/train.py b/imcui/third_party/RIPE/ripe/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9e3821834782bb5c46c7613075dd1b6233423dd
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/train.py
@@ -0,0 +1,410 @@
+import pyrootutils
+
+root = pyrootutils.setup_root(
+ search_from=__file__,
+ indicator=[".git", "pyproject.toml"],
+ pythonpath=True,
+ dotenv=True,
+)
+
+SEED = 32000
+
+import collections
+import os
+
+import hydra
+from hydra.utils import instantiate
+from lightning.fabric import Fabric
+
+print(SEED)
+import random
+
+os.environ["PYTHONHASHSEED"] = str(SEED)
+
+import numpy as np
+import torch
+import tqdm
+import wandb
+from torch.optim.adamw import AdamW
+from torch.utils.data import DataLoader
+
+from ripe import utils
+from ripe.benchmarks.imw_2020 import IMW_2020_Benchmark
+from ripe.utils.utils import get_rewards
+from ripe.utils.wandb_utils import get_flattened_wandb_cfg
+
+log = utils.get_pylogger(__name__)
+from pathlib import Path
+
+torch.manual_seed(SEED)
+np.random.seed(SEED)
+random.seed(SEED)
+
+
+def unpack_batch(batch):
+ src_image = batch["src_image"]
+ trg_image = batch["trg_image"]
+ trg_mask = batch["trg_mask"]
+ src_mask = batch["src_mask"]
+ label = batch["label"]
+ H = batch["homography"]
+
+ return src_image, trg_image, src_mask, trg_mask, H, label
+
+
+@hydra.main(config_path="../conf/", config_name="config", version_base=None)
+def train(cfg):
+ """Main training function for the RIPE model."""
+ # Prepare model, data and hyperparms
+
+ strategy = "ddp" if cfg.num_gpus > 1 else "auto"
+ fabric = Fabric(
+ accelerator="cuda",
+ devices=cfg.num_gpus,
+ precision=cfg.precision,
+ strategy=strategy,
+ )
+ fabric.launch()
+
+ output_dir = Path(cfg.output_dir)
+ experiment_name = output_dir.parent.parent.parent.name
+ run_id = output_dir.parent.parent.name
+ timestamp = output_dir.parent.name + "_" + output_dir.name
+
+ experiment_name = run_id + " " + timestamp + " " + experiment_name
+
+ # setup logger
+ wandb_logger = wandb.init(
+ project=cfg.project_name,
+ name=experiment_name,
+ config=get_flattened_wandb_cfg(cfg),
+ dir=cfg.output_dir,
+ mode=cfg.wandb_mode,
+ )
+
+ min_nums_matches = {"homography": 4, "fundamental": 8, "fundamental_7pt": 7}
+ min_num_matches = min_nums_matches[cfg.transformation_model]
+ print(f"Minimum number of matches for {cfg.transformation_model} is {min_num_matches}")
+
+ batch_size = cfg.batch_size
+ steps = cfg.num_steps
+ lr = cfg.lr
+
+ num_grad_accs = (
+ cfg.num_grad_accs
+ ) # this performs grad accumulation to simulate larger batch size, set to 1 to disable;
+
+ # instantiate dataset
+ ds = instantiate(cfg.data)
+
+ # prepare dataloader
+ dl = DataLoader(
+ ds,
+ batch_size=batch_size,
+ shuffle=True,
+ drop_last=True,
+ persistent_workers=False,
+ num_workers=cfg.num_workers,
+ )
+ dl = fabric.setup_dataloaders(dl)
+ i_dl = iter(dl)
+
+ # create matcher
+ matcher = instantiate(cfg.matcher)
+
+ if cfg.desc_loss_weight != 0.0:
+ descriptor_loss = instantiate(cfg.descriptor_loss)
+ else:
+ log.warning(
+ "Descriptor loss weight is 0.0, descriptor loss will not be used. 1x1 conv for descriptors will be deactivated!"
+ )
+ descriptor_loss = None
+
+ upsampler = instantiate(cfg.upsampler) if "upsampler" in cfg else None
+
+ # create network
+ net = instantiate(cfg.network)(
+ net=instantiate(cfg.backbones),
+ upsampler=upsampler,
+ descriptor_dim=cfg.descriptor_dim if descriptor_loss is not None else None,
+ device=fabric.device,
+ ).train()
+
+ # get num parameters
+ num_params = sum(p.numel() for p in net.parameters() if p.requires_grad)
+ log.info(f"Number of parameters: {num_params}")
+
+ fp_penalty = cfg.fp_penalty # small penalty for not finding a match
+ kp_penalty = cfg.kp_penalty # small penalty for low logprob keypoints
+
+ opt_pi = AdamW(filter(lambda x: x.requires_grad, net.parameters()), lr=lr, weight_decay=1e-5)
+ net, opt_pi = fabric.setup(net, opt_pi)
+
+ if cfg.lr_scheduler:
+ scheduler = instantiate(cfg.lr_scheduler)(optimizer=opt_pi, steps_init=0)
+ else:
+ scheduler = None
+
+ val_benchmark = IMW_2020_Benchmark(
+ use_predefined_subset=True,
+ conf_inference=cfg.conf_inference,
+ edge_input_divisible_by=None,
+ )
+
+ # mean average of skipped batches
+ # this is used to monitor how many batches were skipped due to not enough keypoints
+ # this is useful to detect if the model is not learning anything -> should be zero
+ ma_skipped_batches = collections.deque(maxlen=100)
+
+ opt_pi.zero_grad()
+
+ # initialize scheduler
+ alpha_scheduler = instantiate(cfg.alpha_scheduler)
+ beta_scheduler = instantiate(cfg.beta_scheduler)
+ inl_th_scheduler = instantiate(cfg.inl_th)
+
+ # ====== Training Loop ======
+ # check if the model is in training mode
+ net.train()
+
+ with tqdm.tqdm(total=steps) as pbar:
+ for i_step in range(steps):
+ alpha = alpha_scheduler(i_step)
+ beta = beta_scheduler(i_step)
+ inl_th = inl_th_scheduler(i_step)
+
+ if scheduler:
+ scheduler.step()
+
+ # Initialize vars for current step
+ # We need to handle batching because the description can have arbitrary number of keypoints
+ sum_reward_batch = 0
+ sum_num_keypoints_1 = 0
+ sum_num_keypoints_2 = 0
+ loss = None
+ loss_policy_stack = None
+ loss_desc_stack = None
+ loss_kp_stack = None
+
+ try:
+ batch = next(i_dl)
+ except StopIteration:
+ i_dl = iter(dl)
+ batch = next(i_dl)
+
+ p1, p2, mask_padding_1, mask_padding_2, Hs, label = unpack_batch(batch)
+
+ (
+ kpts1,
+ logprobs1,
+ selected_mask1,
+ mask_padding_grid_1,
+ logits_selected_1,
+ out1,
+ ) = net(p1, mask_padding_1, training=True)
+ (
+ kpts2,
+ logprobs2,
+ selected_mask2,
+ mask_padding_grid_2,
+ logits_selected_2,
+ out2,
+ ) = net(p2, mask_padding_2, training=True)
+
+ # upsample coarse descriptors for all keypoints from the intermediate feature maps from the encoder
+ desc_1 = net.get_descs(out1["coarse_descs"], p1, kpts1, p1.shape[2], p1.shape[3])
+ desc_2 = net.get_descs(out2["coarse_descs"], p2, kpts2, p2.shape[2], p2.shape[3])
+
+ if cfg.padding_filter_mode == "ignore": # remove keypoints that are in padding
+ batch_mask_selection_for_matching_1 = selected_mask1 & mask_padding_grid_1
+ batch_mask_selection_for_matching_2 = selected_mask2 & mask_padding_grid_2
+ elif cfg.padding_filter_mode == "punish":
+ batch_mask_selection_for_matching_1 = selected_mask1 # keep all keypoints
+ batch_mask_selection_for_matching_2 = selected_mask2 # punish the keypoints in the padding area
+ else:
+ raise ValueError(f"Unknown padding filter mode: {cfg.padding_filter_mode}")
+
+ (
+ batch_rel_idx_matches,
+ batch_abs_idx_matches,
+ batch_ransac_inliers,
+ batch_Fm,
+ ) = matcher(
+ kpts1,
+ kpts2,
+ desc_1,
+ desc_2,
+ batch_mask_selection_for_matching_1,
+ batch_mask_selection_for_matching_2,
+ inl_th,
+ label if cfg.no_filtering_negatives else None,
+ )
+
+ for b in range(batch_size):
+ # ignore if less than 16 keypoints have been detected
+ if batch_rel_idx_matches[b] is None:
+ ma_skipped_batches.append(1)
+ continue
+ else:
+ ma_skipped_batches.append(0)
+
+ mask_selection_for_matching_1 = batch_mask_selection_for_matching_1[b]
+ mask_selection_for_matching_2 = batch_mask_selection_for_matching_2[b]
+
+ rel_idx_matches = batch_rel_idx_matches[b]
+ abs_idx_matches = batch_abs_idx_matches[b]
+ ransac_inliers = batch_ransac_inliers[b]
+
+ if cfg.selected_only:
+ # every SELECTED keypoint with every other SELECTED keypoint
+ dense_logprobs = logprobs1[b][mask_selection_for_matching_1].view(-1, 1) + logprobs2[b][
+ mask_selection_for_matching_2
+ ].view(1, -1)
+ else:
+ if cfg.padding_filter_mode == "ignore":
+ # every keypoint with every other keypoint, but WITHOUT keypoint in the padding area
+ dense_logprobs = logprobs1[b][mask_padding_grid_1[b]].view(-1, 1) + logprobs2[b][
+ mask_padding_grid_2[b]
+ ].view(1, -1)
+ elif cfg.padding_filter_mode == "punish":
+ # every keypoint with every other keypoint, also WITH keypoints in the padding areas -> will be punished by the reward
+ dense_logprobs = logprobs1[b].view(-1, 1) + logprobs2[b].view(1, -1)
+ else:
+ raise ValueError(f"Unknown padding filter mode: {cfg.padding_filter_mode}")
+
+ reward = None
+
+ if cfg.reward_type == "inlier":
+ reward = (
+ 0.5 if cfg.no_filtering_negatives and not label[b] else 1.0
+ ) # reward is 1.0 if the pair is positive, 0.5 if negative and no filtering is applied
+ elif cfg.reward_type == "inlier_ratio":
+ ratio_inlier = ransac_inliers.sum() / len(abs_idx_matches)
+ reward = ratio_inlier # reward is the ratio of inliers -> higher if more matches are inliers
+ elif cfg.reward_type == "inlier+inlier_ratio":
+ ratio_inlier = ransac_inliers.sum() / len(abs_idx_matches)
+ reward = (
+ (1.0 - beta) * 1.0 + beta * ratio_inlier
+ ) # reward is a combination of the ratio of inliers and the number of inliers -> gradually changes
+ else:
+ raise ValueError(f"Unknown reward type: {cfg.reward_type}")
+
+ dense_rewards = get_rewards(
+ reward,
+ kpts1[b],
+ kpts2[b],
+ mask_selection_for_matching_1,
+ mask_selection_for_matching_2,
+ mask_padding_grid_1[b],
+ mask_padding_grid_2[b],
+ rel_idx_matches,
+ abs_idx_matches,
+ ransac_inliers,
+ label[b],
+ fp_penalty * alpha,
+ use_whitening=cfg.use_whitening,
+ selected_only=cfg.selected_only,
+ filter_mode=cfg.padding_filter_mode,
+ )
+
+ if descriptor_loss is not None:
+ hard_loss = descriptor_loss(
+ desc1=desc_1[b],
+ desc2=desc_2[b],
+ matches=abs_idx_matches,
+ inliers=ransac_inliers,
+ label=label[b],
+ logits_1=None,
+ logits_2=None,
+ )
+ loss_desc_stack = (
+ hard_loss if loss_desc_stack is None else torch.hstack((loss_desc_stack, hard_loss))
+ )
+
+ sum_reward_batch += dense_rewards.sum()
+
+ current_loss_policy = (dense_rewards * dense_logprobs).view(-1)
+
+ loss_policy_stack = (
+ current_loss_policy
+ if loss_policy_stack is None
+ else torch.hstack((loss_policy_stack, current_loss_policy))
+ )
+
+ if kp_penalty != 0.0:
+ # keypoints with low logprob are penalized
+ # as they get large negative logprob values multiplying them with the penalty will make the loss larger
+ loss_kp = (
+ logprobs1[b][mask_selection_for_matching_1]
+ * torch.full_like(
+ logprobs1[b][mask_selection_for_matching_1],
+ kp_penalty * alpha,
+ )
+ ).mean() + (
+ logprobs2[b][mask_selection_for_matching_2]
+ * torch.full_like(
+ logprobs2[b][mask_selection_for_matching_2],
+ kp_penalty * alpha,
+ )
+ ).mean()
+ loss_kp_stack = loss_kp if loss_kp_stack is None else torch.hstack((loss_kp_stack, loss_kp))
+
+ sum_num_keypoints_1 += mask_selection_for_matching_1.sum()
+ sum_num_keypoints_2 += mask_selection_for_matching_2.sum()
+
+ loss = loss_policy_stack.mean()
+ if loss_kp_stack is not None:
+ loss += loss_kp_stack.mean()
+
+ loss = -loss
+
+ if descriptor_loss is not None:
+ loss += cfg.desc_loss_weight * loss_desc_stack.mean()
+
+ pbar.set_description(
+ f"LP: {loss.item():.4f} - Det: ({sum_num_keypoints_1 / batch_size:.4f}, {sum_num_keypoints_2 / batch_size:.4f}), #mRwd: {sum_reward_batch / batch_size:.1f}"
+ )
+ pbar.update()
+
+ # backward pass
+ loss /= num_grad_accs
+ fabric.backward(loss)
+
+ if i_step % num_grad_accs == 0:
+ opt_pi.step()
+ opt_pi.zero_grad()
+
+ if i_step % cfg.log_interval == 0:
+ wandb_logger.log(
+ {
+ # "loss": loss.item() if not use_amp else scaled_loss.item(),
+ "loss": loss.item(),
+ "loss_policy": -loss_policy_stack.mean().item(),
+ "loss_kp": loss_kp_stack.mean().item() if loss_kp_stack is not None else 0.0,
+ "loss_hard": (loss_desc_stack.mean().item() if loss_desc_stack is not None else 0.0),
+ "mean_num_det_kpts1": sum_num_keypoints_1 / batch_size,
+ "mean_num_det_kpts2": sum_num_keypoints_2 / batch_size,
+ "mean_reward": sum_reward_batch / batch_size,
+ "lr": opt_pi.param_groups[0]["lr"],
+ "ma_skipped_batches": sum(ma_skipped_batches) / len(ma_skipped_batches),
+ "inl_th": inl_th,
+ },
+ step=i_step,
+ )
+
+ if i_step % cfg.val_interval == 0:
+ val_benchmark.evaluate(net, fabric.device, progress_bar=False)
+ val_benchmark.log_results(logger=wandb_logger, step=i_step)
+
+ # ensure that the model is in training mode again
+ net.train()
+
+ # save the model
+ torch.save(
+ net.state_dict(),
+ output_dir / ("model" + "_" + str(i_step + 1) + "_final" + ".pth"),
+ )
+
+
+if __name__ == "__main__":
+ train()
diff --git a/imcui/third_party/RIPE/ripe/utils/__init__.py b/imcui/third_party/RIPE/ripe/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..282a3539f626c0c25f00a6fd1adf45e9b7e10a5a
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/utils/__init__.py
@@ -0,0 +1,2 @@
+# from x_dd.utils import loggers
+from ripe.utils.pylogger import get_pylogger # noqa: F401
diff --git a/imcui/third_party/RIPE/ripe/utils/image_utils.py b/imcui/third_party/RIPE/ripe/utils/image_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e10595fbedd945563b602aa03107f98426537f09
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/utils/image_utils.py
@@ -0,0 +1,62 @@
+import h5py
+import numpy as np
+import torch
+
+
+class Camera:
+ def __init__(self, K, R, t):
+ self.K = K
+ self.R = R
+ self.t = t
+
+ @classmethod
+ def from_calibration_file(cls, path: str):
+ with h5py.File(path, "r") as f:
+ K = torch.tensor(np.array(f["K"]), dtype=torch.float32)
+ R = torch.tensor(np.array(f["R"]), dtype=torch.float32)
+ T = torch.tensor(np.array(f["T"]), dtype=torch.float32)
+
+ return cls(K, R, T)
+
+ @property
+ def K_inv(self):
+ return self.K.inverse()
+
+ def to_cameradict(self):
+ fx = self.K[0, 0].item()
+ fy = self.K[1, 1].item()
+ cx = self.K[0, 2].item()
+ cy = self.K[1, 2].item()
+
+ params = {
+ "model": "PINHOLE",
+ "width": int(cx * 2),
+ "height": int(cy * 2),
+ "params": [fx, fy, cx, cy],
+ }
+
+ return params
+
+ def __repr__(self):
+ return f"ImageData(K={self.K}, R={self.R}, t={self.t})"
+
+
+def cameras2F(cam1: Camera, cam2: Camera) -> torch.Tensor:
+ E = cameras2E(cam1, cam2)
+ return cam2.K_inv.T @ E @ cam1.K_inv
+
+
+def cameras2E(cam1: Camera, cam2: Camera) -> torch.Tensor:
+ R = cam2.R @ cam1.R.T
+ T = cam2.t - R @ cam1.t
+ return cross_product_matrix(T) @ R
+
+
+def cross_product_matrix(v) -> torch.Tensor:
+ """Following en.wikipedia.org/wiki/Cross_product#Conversion_to_matrix_multiplication."""
+
+ return torch.tensor(
+ [[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]],
+ dtype=v.dtype,
+ device=v.device,
+ )
diff --git a/imcui/third_party/RIPE/ripe/utils/pose_error.py b/imcui/third_party/RIPE/ripe/utils/pose_error.py
new file mode 100644
index 0000000000000000000000000000000000000000..73bd68443fa4fc93f6f663a38bd5020c3a33bdf8
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/utils/pose_error.py
@@ -0,0 +1,62 @@
+# mostly from: https://github.com/cvg/glue-factory/blob/main/gluefactory/geometry/epipolar.py
+
+import numpy as np
+import torch
+
+
+def angle_error_mat(R1, R2):
+ cos = (torch.trace(torch.einsum("...ij, ...jk -> ...ik", R1.T, R2)) - 1) / 2
+ cos = torch.clip(cos, -1.0, 1.0) # numerical errors can make it out of bounds
+ return torch.rad2deg(torch.abs(torch.arccos(cos)))
+
+
+def angle_error_vec(v1, v2, eps=1e-10):
+ n = torch.clip(v1.norm(dim=-1) * v2.norm(dim=-1), min=eps)
+ v1v2 = (v1 * v2).sum(dim=-1) # dot product in the last dimension
+ return torch.rad2deg(torch.arccos(torch.clip(v1v2 / n, -1.0, 1.0)))
+
+
+def relative_pose_error(R_gt, t_gt, R, t, ignore_gt_t_thr=0.0, eps=1e-10):
+ # angle error between 2 vectors
+ t_err = angle_error_vec(t, t_gt, eps)
+ t_err = torch.minimum(t_err, 180 - t_err) # handle E ambiguity
+ if t_gt.norm() < ignore_gt_t_thr: # pure rotation is challenging
+ t_err = torch.zeros_like(t_err)
+
+ # angle error between 2 rotation matrices
+ r_err = angle_error_mat(R, R_gt)
+
+ return t_err, r_err
+
+
+def cal_error_auc(errors, thresholds):
+ sort_idx = np.argsort(errors)
+ errors = np.array(errors.copy())[sort_idx]
+ recall = (np.arange(len(errors)) + 1) / len(errors)
+ errors = np.r_[0.0, errors]
+ recall = np.r_[0.0, recall]
+ aucs = []
+ for t in thresholds:
+ last_index = np.searchsorted(errors, t)
+ r = np.r_[recall[:last_index], recall[last_index - 1]]
+ e = np.r_[errors[:last_index], t]
+ aucs.append(np.round((np.trapz(r, x=e) / t), 4))
+ return aucs
+
+
+class AUCMetric:
+ def __init__(self, thresholds, elements=None):
+ self._elements = elements
+ self.thresholds = thresholds
+ if not isinstance(thresholds, list):
+ self.thresholds = [thresholds]
+
+ def update(self, tensor):
+ assert tensor.dim() == 1
+ self._elements += tensor.cpu().numpy().tolist()
+
+ def compute(self):
+ if len(self._elements) == 0:
+ return np.nan
+ else:
+ return cal_error_auc(self._elements, self.thresholds)
diff --git a/imcui/third_party/RIPE/ripe/utils/pylogger.py b/imcui/third_party/RIPE/ripe/utils/pylogger.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ca744ce56fadae1cc47395cbce8930ca0fa59c7
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/utils/pylogger.py
@@ -0,0 +1,32 @@
+import logging
+
+# from pytorch_lightning.utilities import rank_zero_only
+
+
+def init_base_pylogger():
+ """Initializes base python command line logger."""
+
+ logging.basicConfig(
+ level=logging.WARNING,
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+ handlers=[logging.StreamHandler()],
+ )
+
+
+def get_pylogger(name=__name__) -> logging.Logger:
+ """Initializes multi-GPU-friendly python command line logger."""
+
+ if not logging.root.handlers:
+ init_base_pylogger()
+
+ logger = logging.getLogger(name)
+
+ logger.setLevel(logging.DEBUG)
+
+ # this ensures all logging levels get marked with the rank zero decorator
+ # otherwise logs would get multiplied for each GPU process in multi-GPU setup
+ # logging_levels = ("debug", "info", "warning", "error", "exception", "fatal", "critical")
+ # for level in logging_levels:
+ # setattr(logger, level, rank_zero_only(getattr(logger, level)))
+
+ return logger
diff --git a/imcui/third_party/RIPE/ripe/utils/utils.py b/imcui/third_party/RIPE/ripe/utils/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..5033f9b90089b98126fb31dd545fb073c0ac0f87
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/utils/utils.py
@@ -0,0 +1,192 @@
+import random
+from typing import List
+
+import cv2
+import numpy as np
+import torch
+from torchvision.transforms.functional import resize
+
+from ripe import utils
+
+log = utils.get_pylogger(__name__)
+
+
+def gridify(x, window_size):
+ """Turn a tensor of BxCxHxW into a tensor of
+ BxCx(H//window_size)x(W//window_size)x(window_size**2)
+
+ Params:
+ x: Input tensor of shape BxCxHxW
+ window_size: Size of the window
+
+ Returns:
+ x: Output tensor of shape BxCx(H//window_size)x(W//window_size)x(window_size**2)
+ """
+
+ assert x.dim() == 4, "Input tensor x must have 4 dimensions"
+
+ B, C, H, W = x.shape
+ x = (
+ x.unfold(2, window_size, window_size)
+ .unfold(3, window_size, window_size)
+ .reshape(B, C, H // window_size, W // window_size, window_size**2)
+ )
+
+ return x
+
+
+def get_grid(B, H, W, device):
+ x1_n = torch.meshgrid(*[torch.linspace(-1 + 1 / n, 1 - 1 / n, n, device=device) for n in (B, H, W)])
+ x1_n = torch.stack((x1_n[2], x1_n[1]), dim=-1).reshape(B, H * W, 2)
+ return x1_n
+
+
+def cv2_matches_from_kornia(match_dists: torch.Tensor, match_idxs: torch.Tensor) -> List[cv2.DMatch]:
+ return [cv2.DMatch(idx[0].item(), idx[1].item(), d.item()) for idx, d in zip(match_idxs, match_dists)]
+
+
+def to_cv_kpts(kpts, scores):
+ kp = kpts.cpu().numpy().astype(np.int16)
+ s = scores.cpu().numpy()
+
+ cv_kp = [cv2.KeyPoint(kp[i][0], kp[i][1], 6, 0, s[i]) for i in range(len(kp))]
+
+ return cv_kp
+
+
+def resize_image(image, min_size=512, max_size=768):
+ """Resize image to a new size while maintaining the aspect ratio.
+
+ Params:
+ image (torch.tensor): Image to be resized.
+ min_size (int): Minimum size of the smaller dimension.
+ max_size (int): Maximum size of the larger dimension.
+
+ Returns:
+ image: Resized image.
+ """
+
+ h, w = image.shape[-2:]
+
+ aspect_ratio = w / h
+
+ if w > h:
+ new_w = max(min_size, min(max_size, w))
+ new_h = int(new_w / aspect_ratio)
+ else:
+ new_h = max(min_size, min(max_size, h))
+ new_w = int(new_h * aspect_ratio)
+
+ new_size = (new_h, new_w)
+
+ image = resize(image, new_size)
+
+ return image
+
+
+def get_rewards(
+ reward,
+ kps1,
+ kps2,
+ selected_mask1,
+ selected_mask2,
+ padding_mask1,
+ padding_mask2,
+ rel_idx_matches,
+ abs_idx_matches,
+ ransac_inliers,
+ label,
+ penalty=0.0,
+ use_whitening=False,
+ selected_only=False,
+ filter_mode=None,
+):
+ with torch.no_grad():
+ reward *= 1.0 if label else -1.0
+
+ dense_returns = torch.zeros((len(kps1), len(kps2)), device=kps1.device)
+
+ if filter_mode == "ignore":
+ dense_returns[
+ abs_idx_matches[:, 0][ransac_inliers],
+ abs_idx_matches[:, 1][ransac_inliers],
+ ] = reward
+ elif filter_mode == "punish":
+ in_padding_area = (
+ padding_mask1[abs_idx_matches[:, 0]] & padding_mask2[abs_idx_matches[:, 1]]
+ ) # both in the image area (not in padding area)
+
+ dense_returns[
+ abs_idx_matches[:, 0][ransac_inliers & in_padding_area],
+ abs_idx_matches[:, 1][ransac_inliers & in_padding_area],
+ ] = reward
+ dense_returns[
+ abs_idx_matches[:, 0][ransac_inliers & ~in_padding_area],
+ abs_idx_matches[:, 1][ransac_inliers & ~in_padding_area],
+ ] = -1.0
+ else:
+ raise ValueError(f"Unknown filter mode: {filter_mode}")
+
+ if selected_only:
+ dense_returns = dense_returns[selected_mask1, :][:, selected_mask2]
+ if filter_mode == "ignore" and not selected_only:
+ dense_returns = dense_returns[padding_mask1, :][:, padding_mask2]
+
+ if penalty != 0.0:
+ # pos. pair: small penalty for not finding a match
+ # neg. pair: small reward for not finding a match
+ penalty_val = penalty if label else -penalty
+
+ dense_returns[dense_returns == 0.0] = penalty_val
+
+ if use_whitening:
+ dense_returns = (dense_returns - dense_returns.mean()) / (dense_returns.std() + 1e-6)
+
+ return dense_returns
+
+
+def get_other_random_id(idx: int, len_dataset: int, min_dist: int = 20):
+ for _ in range(10):
+ tgt_id = random.randint(0, len_dataset - 1)
+ if abs(idx - tgt_id) >= min_dist:
+ return tgt_id
+
+ raise ValueError(f"Could not find target image with distance >= {min_dist} from source image {idx}")
+
+
+def cv_resize_and_pad_to_shape(image, new_shape, padding_color=(0, 0, 0)):
+ """Resizes image to new_shape with maintaining the aspect ratio and pads with padding_color if
+ needed.
+
+ Params:
+ image: Image to be resized.
+ new_shape: Expected (height, width) of new image.
+ padding_color: Tuple in BGR of padding color
+ Returns:
+ image: Resized image with padding
+ """
+ h, w = image.shape[:2]
+
+ scale_h = new_shape[0] / h
+ scale_w = new_shape[1] / w
+
+ scale = None
+ if scale_w * h > new_shape[0]:
+ scale = scale_h
+ elif scale_h * w > new_shape[1]:
+ scale = scale_w
+ else:
+ scale = max(scale_h, scale_w)
+
+ new_w, new_h = int(round(w * scale)), int(round(h * scale))
+
+ image = cv2.resize(image, (new_w, new_h))
+
+ missing_h = new_shape[0] - new_h
+ missing_w = new_shape[1] - new_w
+
+ top, bottom = missing_h // 2, missing_h - (missing_h // 2)
+ left, right = missing_w // 2, missing_w - (missing_w // 2)
+
+ image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=padding_color)
+ return image
diff --git a/imcui/third_party/RIPE/ripe/utils/wandb_utils.py b/imcui/third_party/RIPE/ripe/utils/wandb_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4365a3ce241c1966302b183553f85b45fe1bf75a
--- /dev/null
+++ b/imcui/third_party/RIPE/ripe/utils/wandb_utils.py
@@ -0,0 +1,16 @@
+import omegaconf
+
+
+def get_flattened_wandb_cfg(conf_dict):
+ flattened = {}
+
+ def _flatten(cfg, prefix=""):
+ for k, v in cfg.items():
+ new_key = f"{prefix}.{k}" if prefix else k
+ if isinstance(v, omegaconf.dictconfig.DictConfig):
+ _flatten(v, new_key)
+ else:
+ flattened[new_key] = v
+
+ _flatten(conf_dict)
+ return flattened