Spaces:

Realcat
/

image-matching-webui

Running

App Files Files Community

Realcat commited on 1 day ago

Commit

e6ac593

1 Parent(s): 1b369eb

add: ripe

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +2 -1
config/config.yaml +13 -2
imcui/hloc/extract_features.py +11 -0
imcui/hloc/extractors/ripe.py +46 -0
imcui/third_party/RIPE/.gitignore +179 -0
imcui/third_party/RIPE/LICENSE +35 -0
imcui/third_party/RIPE/LICENSE_DALF_DISK +201 -0
imcui/third_party/RIPE/README.md +367 -0
imcui/third_party/RIPE/app.py +272 -0
imcui/third_party/RIPE/assets/all_souls_000013.jpg +3 -0
imcui/third_party/RIPE/assets/all_souls_000055.jpg +3 -0
imcui/third_party/RIPE/assets/teaser_image.png +3 -0
imcui/third_party/RIPE/conda_env.yml +26 -0
imcui/third_party/RIPE/conf/backbones/resnet.yaml +6 -0
imcui/third_party/RIPE/conf/backbones/vgg.yaml +5 -0
imcui/third_party/RIPE/conf/data/disk_megadepth.yaml +12 -0
imcui/third_party/RIPE/conf/data/megadepth+acdc.yaml +33 -0
imcui/third_party/RIPE/conf/data/megadepth+tokyo.yaml +29 -0
imcui/third_party/RIPE/conf/descriptor_loss/contrastive_loss.yaml +3 -0
imcui/third_party/RIPE/conf/inl_th/constant.yaml +2 -0
imcui/third_party/RIPE/conf/inl_th/exp_decay.yaml +4 -0
imcui/third_party/RIPE/conf/matcher/concurrent_mnn_poselib.yaml +8 -0
imcui/third_party/RIPE/conf/train.yaml +89 -0
imcui/third_party/RIPE/conf/upsampler/hypercolumn_features.yaml +2 -0
imcui/third_party/RIPE/conf/upsampler/interpolate_sparse2D.yaml +1 -0
imcui/third_party/RIPE/data/download_disk_data.sh +43 -0
imcui/third_party/RIPE/demo.py +51 -0
imcui/third_party/RIPE/ripe/__init__.py +1 -0
imcui/third_party/RIPE/ripe/benchmarks/imw_2020.py +320 -0
imcui/third_party/RIPE/ripe/data/__init__.py +0 -0
imcui/third_party/RIPE/ripe/data/data_transforms.py +204 -0
imcui/third_party/RIPE/ripe/data/datasets/__init__.py +0 -0
imcui/third_party/RIPE/ripe/data/datasets/acdc.py +154 -0
imcui/third_party/RIPE/ripe/data/datasets/dataset_combinator.py +88 -0
imcui/third_party/RIPE/ripe/data/datasets/disk_imw.py +160 -0
imcui/third_party/RIPE/ripe/data/datasets/disk_megadepth.py +157 -0
imcui/third_party/RIPE/ripe/data/datasets/tokyo247.py +134 -0
imcui/third_party/RIPE/ripe/losses/__init__.py +0 -0
imcui/third_party/RIPE/ripe/losses/contrastive_loss.py +88 -0
imcui/third_party/RIPE/ripe/matcher/__init__.py +0 -0
imcui/third_party/RIPE/ripe/matcher/concurrent_matcher.py +97 -0
imcui/third_party/RIPE/ripe/matcher/pose_estimator_poselib.py +31 -0
imcui/third_party/RIPE/ripe/model_zoo/__init__.py +1 -0
imcui/third_party/RIPE/ripe/model_zoo/vgg_hyper.py +39 -0
imcui/third_party/RIPE/ripe/models/__init__.py +0 -0
imcui/third_party/RIPE/ripe/models/backbones/__init__.py +0 -0
imcui/third_party/RIPE/ripe/models/backbones/backbone_base.py +61 -0
imcui/third_party/RIPE/ripe/models/backbones/vgg.py +99 -0
imcui/third_party/RIPE/ripe/models/backbones/vgg_utils.py +143 -0
imcui/third_party/RIPE/ripe/models/ripe.py +303 -0

README.md CHANGED Viewed

@@ -44,8 +44,9 @@ The tool currently supports various popular image matching algorithms, namely:
 | Algorithm        | Supported | Conference/Journal | Year | GitHub Link |
 |------------------|-----------|--------------------|------|-------------|
-| LiftFeat       | ✅ | ICRA    | 2025 | [Link](https://github.com/lyp-deeplearning/LiftFeat) |
 | RDD            | ✅ | CVPR    | 2025 | [Link](https://github.com/xtcpete/rdd)  |
 | DaD            | ✅ | ARXIV   | 2025 | [Link](https://github.com/Parskatt/dad) |
 | MINIMA         | ✅ | ARXIV   | 2024 | [Link](https://github.com/LSXI7/MINIMA) |
 | XoFTR          | ✅ | CVPR    | 2024 | [Link](https://github.com/OnderT/XoFTR) |

 | Algorithm        | Supported | Conference/Journal | Year | GitHub Link |
 |------------------|-----------|--------------------|------|-------------|
+| RIPE           | ✅ | ICCV    | 2025 | [Link](https://github.com/fraunhoferhhi/RIPE)  |
 | RDD            | ✅ | CVPR    | 2025 | [Link](https://github.com/xtcpete/rdd)  |
+| LiftFeat       | ✅ | ICRA    | 2025 | [Link](https://github.com/lyp-deeplearning/LiftFeat) |
 | DaD            | ✅ | ARXIV   | 2025 | [Link](https://github.com/Parskatt/dad) |
 | MINIMA         | ✅ | ARXIV   | 2024 | [Link](https://github.com/LSXI7/MINIMA) |
 | XoFTR          | ✅ | CVPR    | 2024 | [Link](https://github.com/OnderT/XoFTR) |

config/config.yaml CHANGED Viewed

@@ -267,6 +267,17 @@ matcher_zoo:
       paper: https://arxiv.org/abs/2505.0342
       project: null
       display: true
   rdd(sparse):
     matcher: NN-mutual
     feature: rdd
@@ -274,7 +285,7 @@ matcher_zoo:
     info:
       name: RDD(sparse) #dispaly name
       source: "CVPR 2025"
-      github: hhttps://github.com/xtcpete/rdd
       paper: https://arxiv.org/abs/2505.08013
       project: https://xtcpete.github.io/rdd
       display: true
@@ -284,7 +295,7 @@ matcher_zoo:
     info:
       name: RDD(dense) #dispaly name
       source: "CVPR 2025"
-      github: hhttps://github.com/xtcpete/rdd
       paper: https://arxiv.org/abs/2505.08013
       project: https://xtcpete.github.io/rdd
       display: true

       paper: https://arxiv.org/abs/2505.0342
       project: null
       display: true
+  ripe(+mnn):
+    matcher: NN-mutual
+    feature: ripe
+    dense: false
+    info:
+      name: RIPE #dispaly name
+      source: "ICCV 2025"
+      github: https://github.com/fraunhoferhhi/RIPE
+      paper: https://arxiv.org/abs/2507.04839
+      project: https://fraunhoferhhi.github.io/RIPE
+      display: true
   rdd(sparse):
     matcher: NN-mutual
     feature: rdd
     info:
       name: RDD(sparse) #dispaly name
       source: "CVPR 2025"
+      github: https://github.com/xtcpete/rdd
       paper: https://arxiv.org/abs/2505.08013
       project: https://xtcpete.github.io/rdd
       display: true
     info:
       name: RDD(dense) #dispaly name
       source: "CVPR 2025"
+      github: https://github.com/xtcpete/rdd
       paper: https://arxiv.org/abs/2505.08013
       project: https://xtcpete.github.io/rdd
       display: true

imcui/hloc/extract_features.py CHANGED Viewed

@@ -236,6 +236,17 @@ confs = {
             "resize_max": 1600,
         },
     },
     "aliked-n16-rot": {
         "output": "feats-aliked-n16-rot",
         "model": {

             "resize_max": 1600,
         },
     },
+    "ripe": {
+        "output": "feats-ripe-n2048-r1600",
+        "model": {
+            "name": "ripe",
+            "max_keypoints": 2048,
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "resize_max": 1600,
+        },
+    },
     "aliked-n16-rot": {
         "output": "feats-aliked-n16-rot",
         "model": {

imcui/hloc/extractors/ripe.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import sys
+from pathlib import Path
+from ..utils.base_model import BaseModel
+from .. import logger, MODEL_REPO_ID
+ripe_path = Path(__file__).parent / "../../third_party/RIPE"
+sys.path.append(str(ripe_path))
+from ripe import vgg_hyper
+class RIPE(BaseModel):
+    default_conf = {
+        "keypoint_threshold": 0.05,
+        "max_keypoints": 5000,
+        "model_name": "weights_ripe.pth",
+    }
+    required_inputs = ["image"]
+    def _init(self, conf):
+        logger.info("Loading RIPE model...")
+        model_path = self._download_model(
+            repo_id=MODEL_REPO_ID,
+            filename="{}/{}".format(Path(__file__).stem, self.conf["model_name"]),
+        )
+        self.net = vgg_hyper(Path(model_path))
+        logger.info("Loading RIPE model done!")
+    def _forward(self, data):
+        keypoints, descriptors, scores = self.net.detectAndCompute(
+            data["image"], threshold=0.5, top_k=2048
+        )
+        if self.conf["max_keypoints"] < len(keypoints):
+            idxs = scores.argsort()[-self.conf["max_keypoints"] or None :]
+            keypoints = keypoints[idxs, :2]
+            descriptors = descriptors[idxs]
+            scores = scores[idxs]
+        pred = {
+            "keypoints": keypoints[None],
+            "descriptors": descriptors[None].permute(0, 2, 1),
+            "scores": scores[None],
+        }
+        return pred

imcui/third_party/RIPE/.gitignore ADDED Viewed

	@@ -0,0 +1,179 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+### VisualStudioCode
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+*.code-workspace
+**/.vscode
+# JetBrains
+.idea/
+# ignore outputs
+/outputs/
+# ignore logs
+/logs/
+tmp.py
+.env
+# ignore pretrained pytorch models
+*.pth
+# ignore lightning_logs
+/lightning_logs/*
+# ignore built apptainer images
+*.sif
+# ignore the outputs server on the cluster
+/output/*
+# ignore .out files generated from the cluster
+*.out
+# ignore hparams_search folder
+/hparams_search_configs/*
+*.o
+*.pkl
+*.ninja_deps
+*.ninja_log
+*.ninja
+/misc/*
+/tmp/*
+/apptainer_env.box/*
+/scripts/tmp_build/*
+/checkpoints
+/pretrained_weights
+/results_supple_cvpr
+/ext_files

imcui/third_party/RIPE/LICENSE ADDED Viewed

	@@ -0,0 +1,35 @@

+Software Copyright License for Academic Use of RIPE, Version 2.0
+© Copyright (2025) Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V.
+1. INTRODUCTION
+RIPE which means any source code, object code or binary files provided by Fraunhofer excluding third party software and materials, is made available under this Software Copyright License.
+2. COPYRIGHT LICENSE
+Internal use of RIPE, in source and binary forms, with or without modification, is permitted without payment of copyright license fees for non-commercial purposes of evaluation, testing and academic research.
+No right or license, express or implied, is granted to any part of RIPE except and solely to the extent as expressly set forth herein. Any commercial use or exploitation of RIPE and/or any modifications thereto under this license are prohibited.
+For any other use of RIPE than permitted by this software copyright license You need another license from Fraunhofer. In such case please contact Fraunhofer under the CONTACT INFORMATION below.
+3. LIMITED PATENT LICENSE
+If Fraunhofer patents are implemented by RIPE their use is permitted for internal non-commercial purposes of evaluation, testing and academic research. No patent grant is provided for any other use, including but not limited to commercial use or exploitation.
+Fraunhofer provides no warranty of patent non-infringement with respect to RIPE.
+4. PLACE OF JURISDICTION
+German law shall apply to all disputes arising from the use of the licensed software. A court in Munich shall have local jurisdiction.
+5. DISCLAIMER
+RIPE is provided by Fraunhofer "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties of fitness for a particular purpose. IN NO EVENT SHALL FRAUNHOFER BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages, including but not limited to procurement of substitute goods or services; loss of use, data, or profits, or business interruption, however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence), arising in any way out of the use of the Fraunhofer Software, even if advised of the possibility of such damage.
+6. CONTACT INFORMATION
+Fraunhofer-Institut für Nachrichtentechnik, Heinrich-Hertz-Institut, HHI
+Einsteinufer 37, 10587 Berlin, Germany
+[email protected]

imcui/third_party/RIPE/LICENSE_DALF_DISK ADDED Viewed

	@@ -0,0 +1,201 @@

+Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

imcui/third_party/RIPE/README.md ADDED Viewed

	@@ -0,0 +1,367 @@

+#
+<p align="center">
+  <h1 align="center"> <ins>RIPE</ins>:<br> Reinforcement Learning on Unlabeled Image Pairs for Robust Keypoint Extraction <br><br>🌊🌺 ICCV 2025 🌺🌊</h1>
+  <p align="center">
+    <a href="https://scholar.google.com/citations?user=ybMR38kAAAAJ">Johannes Künzel</a>
+    ·
+    <a href="https://scholar.google.com/citations?user=5yTuyGIAAAAJ">Anna Hilsmann</a>
+    ·
+    <a href="https://scholar.google.com/citations?user=BCElyCkAAAAJ">Peter Eisert</a>
+  </p>
+  <h2 align="center"><p>
+    <a href="https://arxiv.org/abs/2507.04839" align="center">Arxiv</a> |
+    <a href="https://fraunhoferhhi.github.io/RIPE/" align="center">Project Page</a> |
+    <a href="https://huggingface.co/spaces/JohannesK14/RIPE" align="center">🤗Demo🤗</a>
+  </p></h2>
+  <div align="center"></div>
+</p>
+<br/>
+<p align="center">
+    <img src="assets/teaser_image.png" alt="example" width=80%>
+    <br>
+    <em>RIPE demonstrates that keypoint detection and description can be learned from image pairs only - no depth, no pose, no artificial augmentation required.</em>
+</p>
+## Setup
+💡**Alternative**💡 Install nothing locally and try our Hugging Face demo: [🤗Demo🤗](https://huggingface.co/spaces/JohannesK14/RIPE)
+1. Install mamba by following the instructions given here: [Mamba Installation](https://mamba.readthedocs.io/en/latest/installation/mamba-installation.html)
+2. Create a new environment with:
+```bash
+mamba create -f conda_env.yml
+mamba activate ripe-env
+```
+## How to use
+Or just check [demo.py](demo.py)
+```python
+import cv2
+import kornia.feature as KF
+import kornia.geometry as KG
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from torchvision.io import decode_image
+from ripe import vgg_hyper
+from ripe.utils.utils import cv2_matches_from_kornia, resize_image, to_cv_kpts
+dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = vgg_hyper().to(dev)
+model.eval()
+image1 = resize_image(decode_image("assets/all_souls_000013.jpg").float().to(dev) / 255.0)
+image2 = resize_image(decode_image("assets/all_souls_000055.jpg").float().to(dev) / 255.0)
+kpts_1, desc_1, score_1 = model.detectAndCompute(image1, threshold=0.5, top_k=2048)
+kpts_2, desc_2, score_2 = model.detectAndCompute(image2, threshold=0.5, top_k=2048)
+matcher = KF.DescriptorMatcher("mnn")  # threshold is not used with mnn
+match_dists, match_idxs = matcher(desc_1, desc_2)
+matched_pts_1 = kpts_1[match_idxs[:, 0]]
+matched_pts_2 = kpts_2[match_idxs[:, 1]]
+H, mask = KG.ransac.RANSAC(model_type="fundamental", inl_th=1.0)(matched_pts_1, matched_pts_2)
+matchesMask = mask.int().ravel().tolist()
+result_ransac = cv2.drawMatches(
+    (image1.cpu().permute(1, 2, 0).numpy() * 255.0).astype(np.uint8),
+    to_cv_kpts(kpts_1, score_1),
+    (image2.cpu().permute(1, 2, 0).numpy() * 255.0).astype(np.uint8),
+    to_cv_kpts(kpts_2, score_2),
+    cv2_matches_from_kornia(match_dists, match_idxs),
+    None,
+    matchColor=(0, 255, 0),
+    matchesMask=matchesMask,
+    # matchesMask=None, # without RANSAC filtering
+    singlePointColor=(0, 0, 255),
+    flags=cv2.DrawMatchesFlags_DEFAULT,
+)
+plt.imshow(result_ransac)
+plt.axis("off")
+plt.tight_layout()
+plt.show()
+# plt.savefig("result_ransac.png")
+```
+## Reproduce the results
+### MegaDepth 1500 & HPatches
+1. Download and install [Glue Factory](https://github.com/cvg/glue-factory)
+2. Add this repo as a submodule to Glue Factory:
+```bash
+cd glue-factory
+git submodule add https://github.com/fraunhoferhhi/RIPE.git thirdparty/ripe
+```
+3. Create the new file ripe.py under gluefactory/models/extractors/ with the following content:
+    <details>
+    <summary>ripe.py</summary>
+    ```python
+    import sys
+    from pathlib import Path
+    import torch
+    import torchvision.transforms as transforms
+    from ..base_model import BaseModel
+    ripe_path = Path(__file__).parent / "../../../thirdparty/ripe"
+    print(f"RIPE Path: {ripe_path.resolve()}")
+    # check if the path exists
+    if not ripe_path.exists():
+        raise RuntimeError(f"RIPE path not found: {ripe_path}")
+    sys.path.append(str(ripe_path))
+    from ripe import vgg_hyper
+    class RIPE(BaseModel):
+        default_conf = {
+            "name": "RIPE",
+            "model_path": None,
+            "chunk": 4,
+            "dense_outputs": False,
+            "threshold": 1.0,
+            "top_k": 2048,
+        }
+        required_data_keys = ["image"]
+        # Initialize the line matcher
+        def _init(self, conf):
+            self.normalizer = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+            self.model = vgg_hyper(model_path=conf.model_path)
+            self.model.eval()
+            self.set_initialized()
+        def _forward(self, data):
+            image = data["image"]
+            keypoints, scores, descriptors = [], [], []
+            chunk = self.conf.chunk
+            for i in range(0, image.shape[0], chunk):
+                if self.conf.dense_outputs:
+                    raise NotImplementedError("Dense outputs are not supported")
+                else:
+                    im = image[: min(image.shape[0], i + chunk)]
+                    im = self.normalizer(im)
+                    H, W = im.shape[-2:]
+                    kpt, desc, score = self.model.detectAndCompute(
+                        im,
+                        threshold=self.conf.threshold,
+                        top_k=self.conf.top_k,
+                    )
+                keypoints += [kpt.squeeze(0)]
+                scores += [score.squeeze(0)]
+                descriptors += [desc.squeeze(0)]
+                del kpt
+                del desc
+                del score
+            keypoints = torch.stack(keypoints, 0)
+            scores = torch.stack(scores, 0)
+            descriptors = torch.stack(descriptors, 0)
+            pred = {
+                # "keypoints": keypoints.to(image) + 0.5,
+                "keypoints": keypoints.to(image),
+                "keypoint_scores": scores.to(image),
+                "descriptors": descriptors.to(image),
+            }
+            return pred
+        def loss(self, pred, data):
+            raise NotImplementedError
+    ```
+    </details>
+4. Create ripe+NN.yaml in gluefactory/configs with the following content:
+    <details>
+    <summary>ripe+NN.yaml</summary>
+    ```yaml
+    model:
+        name: two_view_pipeline
+        extractor:
+            name: extractors.ripe
+            threshold: 1.0
+            top_k: 2048
+        matcher:
+            name: matchers.nearest_neighbor_matcher
+    benchmarks:
+        megadepth1500:
+          data:
+            preprocessing:
+              side: long
+              resize: 1600
+          eval:
+            estimator: poselib
+            ransac_th: 0.5
+        hpatches:
+          eval:
+            estimator: poselib
+            ransac_th: 0.5
+          model:
+            extractor:
+              top_k: 1024  # overwrite config above
+    ```
+5. Run the MegaDepth 1500 evaluation script:
+```bash
+python -m gluefactory.eval.megadepth1500 --conf ripe+NN # for MegaDepth 1500
+```
+Should result in:
+```bash
+'rel_pose_error@10°': 0.6834,
+'rel_pose_error@20°': 0.7803,
+'rel_pose_error@5°': 0.5511,
+```
+6. Run the HPatches evaluation script:
+```bash
+python -m gluefactory.eval.hpatches --conf ripe+NN # for HPatches
+```
+Should result in:
+```bash
+'H_error_ransac@1px': 0.3793,
+'H_error_ransac@3px': 0.5893,
+'H_error_ransac@5px': 0.692,
+```
+## Training
+1. Create a .env file with the following content:
+```bash
+OUTPUT_DIR="/output"
+DATA_DIR="/data"
+```
+2. Download the required datasets:
+    <details>
+    <summary>DISK Megadepth subset</summary>
+    To download the dataset used by [DISK](https://github.com/cvlab-epfl/disk) execute the following commands:
+    ```bash
+    cd data
+    bash download_disk_data.sh
+    ```
+    </details>
+    <details>
+    <summary>Tokyo 24/7</summary>
+    - ⚠️**Optional**⚠️: Only if you are interest in the model used in Section 4.6 of the paper!
+    - Download the Tokyo 24/7 query images from here: [Tokyo 24/7 Query Images V3](http://www.ok.ctrl.titech.ac.jp/~torii/project/247/download/247query_v3.zip) from the official [website](http://www.ok.ctrl.titech.ac.jp/~torii/project/247/_).
+    - extract them into data/Tolyo_Query_V3
+    ```bash
+    Tokyo_Query_V3/
+    ├── 00001.csv
+    ├── 00001.jpg
+    ├── 00002.csv
+    ├── 00002.jpg
+    ├── ...
+    ├── 01125.csv
+    ├── 01125.jpg
+    ├── Readme.txt
+    └── Readme.txt~
+    ```
+    </details>
+    <details>
+    <summary>ACDC</summary>
+    - ⚠️**Optional**⚠️: Only if you are interest in the model used in Section 6.1 (supplementary) of the paper!
+    - Download the RGB images from here: [ACDC RGB Images](https://acdc.vision.ee.ethz.ch/rgb_anon_trainvaltest.zip)
+    - extract them into data/ACDC
+    ```bash
+    ACDC/
+    rgb_anon
+    ├── fog
+    │   ├── test
+    │   │   ├── GOPR0475
+    │   │   ├── GOPR0477
+    │   ├── test_ref
+    │   │   ├── GOPR0475
+    │   │   ├── GOPR0477
+    │   ├── train
+    │   │   ├── GOPR0475
+    │   │   ├── GOPR0476
+    ├── night
+    ```
+    </details>
+3. Run the training script:
+```bash
+python ripe/train.py --config-name train project_name=train name=reproduce wandb_mode=offline
+```
+You can also easily switch setting from the command line, e.g. to addionally train on the Tokyo 24/7 dataset:
+```bash
+python ripe/train.py --config-name train project_name=train name=reproduce wandb_mode=offline data=megadepth+tokyo
+```
+## Acknowledgements
+Our code is partly based on the following repositories:
+- [DALF](https://github.com/verlab/DALF_CVPR_2023) Apache License 2.0
+- [DeDoDe](https://github.com/Parskatt/DeDoDe) MIT License
+- [DISK](https://github.com/cvlab-epfl/disk) Apache License 2.0
+Our evaluation was based on the following repositories:
+- [Glue Factory](https://github.com/cvg/glue-factory)
+- [hloc](https://github.com/cvg/Hierarchical-Localization)
+We would like to thank the authors of these repositories for their great work and for making their code available.
+Our project webpage is based on the [Acadamic Project Page Template](https://github.com/eliahuhorwitz/Academic-project-page-template) by Eliahu Horwitz.
+## BibTex Citation
+```
+@article{ripe2025,
+year = {2025},
+title = {{RIPE: Reinforcement Learning on Unlabeled Image Pairs for Robust Keypoint Extraction}},
+author = {Künzel, Johannes and Hilsmann, Anna and Eisert, Peter},
+journal = {arXiv},
+eprint = {2507.04839},
+}
+```

imcui/third_party/RIPE/app.py ADDED Viewed

	@@ -0,0 +1,272 @@

+# This is a small gradio interface to access our RIPE keypoint extractor.
+# You can either upload two images or use one of the example image pairs.
+import os
+import gradio as gr
+from PIL import Image
+from ripe import vgg_hyper
+SEED = 32000
+os.environ["PYTHONHASHSEED"] = str(SEED)
+import random
+from pathlib import Path
+import numpy as np
+import torch
+torch.manual_seed(SEED)
+np.random.seed(SEED)
+random.seed(SEED)
+import cv2
+import kornia.feature as KF
+import kornia.geometry as KG
+from ripe.utils.utils import cv2_matches_from_kornia, to_cv_kpts
+MIN_SIZE = 512
+MAX_SIZE = 768
+description_text = """
+<p align='center'>
+  <h1 align='center'>🌊🌺 ICCV 2025 🌺🌊</h1>
+  <p align='center'>
+    <a href='https://scholar.google.com/citations?user=ybMR38kAAAAJ'>Johannes Künzel</a> ·
+    <a href='https://scholar.google.com/citations?user=5yTuyGIAAAAJ'>Anna Hilsmann</a> ·
+    <a href='https://scholar.google.com/citations?user=BCElyCkAAAAJ'>Peter Eisert</a>
+  </p>
+  <h2 align='center'>
+    <a href='???'>Arxiv</a> |
+    <a href='???'>Project Page</a> |
+    <a href='???'>Code</a>
+  </h2>
+</p>
+<br/>
+<div align='center'>
+### This demo showcases our new keypoint extractor model, RIPE (Reinforcement Learning on Unlabeled Image Pairs for Robust Keypoint Extraction).
+### RIPE is trained without requiring pose or depth supervision or artificial augmentations. By leveraging reinforcement learning, it learns to extract keypoints solely based on whether an image pair depicts the same scene or not.
+### For more detailed information, please refer to our [paper](link to be added).
+The demo code extracts the 2048-top keypoints from the two input images. It uses the mutual nearest neighbor (MNN) descriptor matcher from kornia to find matches between the two images.
+If the number of matches is greater than 8, it applies RANSAC to filter out outliers based on the inlier threshold provided by the user.
+Images are resized to fit within a maximum size of 2048x2048 pixels with maintained aspect ratio.
+</div>
+"""
+path_weights = Path(
+    "/media/jwkuenzel/work/projects/CVG_Reinforced_Keypoints/output/train/ablation_iccv/inlier_threshold/1571243/2025-02-19/14-00-10_789013/model_inlier_threshold_best.pth"
+)
+model = vgg_hyper(path_weights)
+def get_new_image_size(image, min_size=1600, max_size=2048):
+    """
+    Get a new size for the image that is scaled to fit between min_size and max_size while maintaining the aspect ratio.
+    Args:
+        image (PIL.Image): Input image.
+        min_size (int): Minimum allowed size for width and height.
+        max_size (int): Maximum allowed size for width and height.
+    Returns:
+        tuple: New size (width, height) for the image.
+    """
+    width, height = image.size
+    aspect_ratio = width / height
+    if width > height:
+        new_width = max(min_size, min(max_size, width))
+        new_height = int(new_width / aspect_ratio)
+    else:
+        new_height = max(min_size, min(max_size, height))
+        new_width = int(new_height * aspect_ratio)
+    new_size = (new_width, new_height)
+    return new_size
+def extract_keypoints(image1, image2, inl_th):
+    """
+    Extract keypoints from two input images using the RIPE model.
+    Args:
+        image1 (PIL.Image): First input image.
+        image2 (PIL.Image): Second input image.
+        inl_th (float): RANSAC inlier threshold.
+    Returns:
+        dict: A dictionary containing keypoints and matches.
+    """
+    log_text = "Extracting keypoints and matches with RIPE\n"
+    log_text += f"Image 1 size: {image1.size}\n"
+    log_text += f"Image 2 size: {image2.size}\n"
+    # check not larger than 2048x2048
+    new_size = get_new_image_size(image1, min_size=MIN_SIZE, max_size=MAX_SIZE)
+    image1 = image1.resize(new_size)
+    new_size = get_new_image_size(image2, min_size=MIN_SIZE, max_size=MAX_SIZE)
+    image2 = image2.resize(new_size)
+    log_text += f"Resized Image 1 size: {image1.size}\n"
+    log_text += f"Resized Image 2 size: {image2.size}\n"
+    dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(dev)
+    image1 = image1.convert("RGB")
+    image2 = image2.convert("RGB")
+    image1_original = image1.copy()
+    image2_original = image2.copy()
+    # convert PIL images to numpy arrays
+    image1_original = np.array(image1_original)
+    image2_original = np.array(image2_original)
+    # convert PIL images to tensors
+    image1 = torch.tensor(np.array(image1)).permute(2, 0, 1).float() / 255.0
+    image2 = torch.tensor(np.array(image2)).permute(2, 0, 1).float() / 255.0
+    image1 = image1.to(dev).unsqueeze(0)  # Add batch dimension
+    image2 = image2.to(dev).unsqueeze(0)  # Add batch dimension
+    kpts_1, desc_1, score_1 = model.detectAndCompute(image1, threshold=0.5, top_k=2048)
+    kpts_2, desc_2, score_2 = model.detectAndCompute(image2, threshold=0.5, top_k=2048)
+    log_text += f"Number of keypoints in image 1: {kpts_1.shape[0]}\n"
+    log_text += f"Number of keypoints in image 2: {kpts_2.shape[0]}\n"
+    matcher = KF.DescriptorMatcher("mnn")  # threshold is not used with mnn
+    match_dists, match_idxs = matcher(desc_1, desc_2)
+    log_text += f"Number of MNN matches: {match_idxs.shape[0]}\n"
+    cv2_matches = cv2_matches_from_kornia(match_dists, match_idxs)
+    do_ransac = match_idxs.shape[0] > 8
+    if do_ransac:
+        matched_pts_1 = kpts_1[match_idxs[:, 0]]
+        matched_pts_2 = kpts_2[match_idxs[:, 1]]
+        H, mask = KG.ransac.RANSAC(model_type="fundamental", inl_th=inl_th)(matched_pts_1, matched_pts_2)
+        matchesMask = mask.int().ravel().tolist()
+        log_text += f"RANSAC found {mask.sum().item()} inliers out of {mask.shape[0]} matches with an inlier threshold of {inl_th}.\n"
+    else:
+        log_text += "Not enough matches for RANSAC, skipping RANSAC step.\n"
+    kpts_1 = to_cv_kpts(kpts_1, score_1)
+    kpts_2 = to_cv_kpts(kpts_2, score_2)
+    keypoints_raw_1 = cv2.drawKeypoints(image1_original, kpts_1, image1_original, color=(0, 255, 0))
+    keypoints_raw_2 = cv2.drawKeypoints(image2_original, kpts_2, image2_original, color=(0, 255, 0))
+    # pad height smaller image to match the height of the larger image
+    if keypoints_raw_1.shape[0] < keypoints_raw_2.shape[0]:
+        pad_height = keypoints_raw_2.shape[0] - keypoints_raw_1.shape[0]
+        keypoints_raw_1 = np.pad(
+            keypoints_raw_1, ((0, pad_height), (0, 0), (0, 0)), mode="constant", constant_values=255
+        )
+    elif keypoints_raw_1.shape[0] > keypoints_raw_2.shape[0]:
+        pad_height = keypoints_raw_1.shape[0] - keypoints_raw_2.shape[0]
+        keypoints_raw_2 = np.pad(
+            keypoints_raw_2, ((0, pad_height), (0, 0), (0, 0)), mode="constant", constant_values=255
+        )
+    # concatenate keypoints images horizontally
+    keypoints_raw = np.concatenate((keypoints_raw_1, keypoints_raw_2), axis=1)
+    keypoints_raw_pil = Image.fromarray(keypoints_raw)
+    result_raw = cv2.drawMatches(
+        image1_original,
+        kpts_1,
+        image2_original,
+        kpts_2,
+        cv2_matches,
+        None,
+        matchColor=(0, 255, 0),
+        matchesMask=None,
+        # matchesMask=None,
+        flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS,
+    )
+    if not do_ransac:
+        result_ransac = None
+    else:
+        result_ransac = cv2.drawMatches(
+            image1_original,
+            kpts_1,
+            image2_original,
+            kpts_2,
+            cv2_matches,
+            None,
+            matchColor=(0, 255, 0),
+            matchesMask=matchesMask,
+            singlePointColor=(0, 0, 255),
+            flags=cv2.DrawMatchesFlags_DEFAULT,
+        )
+    # result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for display
+    # convert to PIL Image
+    result_raw_pil = Image.fromarray(result_raw)
+    if result_ransac is not None:
+        result_ransac_pil = Image.fromarray(result_ransac)
+    else:
+        result_ransac_pil = None
+    return log_text, result_ransac_pil, result_raw_pil, keypoints_raw_pil
+demo = gr.Interface(
+    fn=extract_keypoints,
+    inputs=[
+        gr.Image(type="pil", label="Image 1"),
+        gr.Image(type="pil", label="Image 2"),
+        gr.Slider(
+            minimum=0.1,
+            maximum=3.0,
+            step=0.1,
+            value=0.5,
+            label="RANSAC inlier threshold",
+            info="Threshold for RANSAC inlier detection. Lower values may yield fewer inliers but more robust matches.",
+        ),
+    ],
+    outputs=[
+        gr.Textbox(type="text", label="Log"),
+        gr.Image(type="pil", label="Keypoints and Matches (RANSAC)"),
+        gr.Image(type="pil", label="Keypoints and Matches"),
+        gr.Image(type="pil", label="Keypoint Detection Results"),
+    ],
+    title="RIPE: Reinforcement Learning on Unlabeled Image Pairs for Robust Keypoint Extraction",
+    description=description_text,
+    examples=[
+        [
+            "assets_gradio/all_souls_000013.jpg",
+            "assets_gradio/all_souls_000055.jpg",
+        ],
+        [
+            "assets_gradio/167170681_0e5c42fd21_o.jpg",
+            "assets_gradio/170804731_6bf4fbecd4_o.jpg",
+        ],
+        [
+            "assets_gradio/4171014767_0fe879b783_o.jpg",
+            "assets_gradio/4174108353_20422632d6_o.jpg",
+        ],
+    ],
+    flagging_mode="never",
+    theme="default",
+)
+demo.launch()

imcui/third_party/RIPE/assets/all_souls_000013.jpg ADDED Viewed

Git LFS Details

SHA256: 60fd73963102f86baf08325631f8912db34acba7fb46cc9a41b818099276187e
Pointer size: 131 Bytes
Size of remote file: 440 kB

imcui/third_party/RIPE/assets/all_souls_000055.jpg ADDED Viewed

Git LFS Details

SHA256: e11c06ae78103c2dbb90737e2bab6aa47f2000948ece5bfe9a1e7eb1cacac53a
Pointer size: 131 Bytes
Size of remote file: 368 kB

imcui/third_party/RIPE/assets/teaser_image.png ADDED Viewed

Git LFS Details

SHA256: bd636ae0eb42927792cba0f04243c2ec65226a6f5e1287ab4ee015353b01c208
Pointer size: 132 Bytes
Size of remote file: 1.69 MB

imcui/third_party/RIPE/conda_env.yml ADDED Viewed

	@@ -0,0 +1,26 @@

+name: ripe-env
+channels:
+  - conda-forge
+dependencies:
+  - python
+  - cmake
+  - eigen # for poselib
+  - pytorch=2.6=*cuda*
+  - torchvision
+  - pip
+  # others
+  - pudb # debugger
+  - pip:
+      - lightning>=2.0.0
+      - setuptools
+      - poselib @ git+https://github.com/PoseLib/PoseLib.git@56d158f744d3561b0b70174e6d8ca9a7fc9bd9c1
+      - hydra-core
+      - opencv-python
+      - torchmetrics
+      - pyrootutils # standardizing the project root setup
+      - rich
+      - matplotlib
+      - kornia
+      - numpy
+      - wandb
+      - h5py

imcui/third_party/RIPE/conf/backbones/resnet.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+_target_: ripe.models.backbones.resnet.ResNet
+nchannels: 3
+pretrained: True
+use_instance_norm: False
+mode: dect
+num_layers: 4

imcui/third_party/RIPE/conf/backbones/vgg.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+_target_: ripe.models.backbones.vgg.VGG
+nchannels: 3
+pretrained: True
+use_instance_norm: False
+mode: dect

imcui/third_party/RIPE/conf/data/disk_megadepth.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+_target_: ripe.data.datasets.disk_megadepth.DISK_Megadepth
+root: ${oc.env:DATA_DIR}/disk-data
+stage: train
+max_scene_size: 10000
+transforms:
+  _target_: ripe.data.data_transforms.Compose
+  transforms:
+    - _target_: ripe.data.data_transforms.Normalize
+      mean: [0.485, 0.456, 0.406]
+      std: [0.229, 0.224, 0.225]
+    - _target_: ripe.data.data_transforms.ResizeAndPadWithHomography
+      target_size_longer_side: 560

imcui/third_party/RIPE/conf/data/megadepth+acdc.yaml ADDED Viewed

	@@ -0,0 +1,33 @@

+_target_: ripe.data.datasets.dataset_combinator.DatasetCombinator
+mode: custom
+weights:
+  - 0.2
+  - 0.8
+datasets:
+  - _target_: ripe.data.datasets.acdc.ACDC
+    root: ${oc.env:DATA_DIR}/ACDC
+    stage: train
+    condition: all
+    transforms:
+      _target_: ripe.data.data_transforms.Compose
+      transforms:
+        - _target_: ripe.data.data_transforms.Normalize
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+        - _target_: ripe.data.data_transforms.Crop # to remove the car hood from some images
+          crop_height: 896
+          crop_width: 1920
+        - _target_: ripe.data.data_transforms.ResizeAndPadWithHomography
+          target_size_longer_side: 560
+  - _target_: ripe.data.datasets.disk_megadepth.DISK_Megadepth
+    root: ${oc.env:DATA_DIR}/disk-data
+    stage: train
+    max_scene_size: 10000
+    transforms:
+      _target_: ripe.data.data_transforms.Compose
+      transforms:
+        - _target_: ripe.data.data_transforms.Normalize
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+        - _target_: ripe.data.data_transforms.ResizeAndPadWithHomography
+          target_size_longer_side: 560

imcui/third_party/RIPE/conf/data/megadepth+tokyo.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+_target_: ripe.data.datasets.dataset_combinator.DatasetCombinator
+mode: custom
+weights:
+  - 0.2
+  - 0.8
+datasets:
+  - _target_: ripe.data.datasets.tokyo_query_v3.TokyoQueryV3
+    root: ${oc.env:DATA_DIR}/Tokyo_Query_V3
+    stage: train
+    transforms:
+      _target_: ripe.data.data_transforms.Compose
+      transforms:
+        - _target_: ripe.data.data_transforms.Normalize
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+        - _target_: ripe.data.data_transforms.ResizeAndPadWithHomography
+          target_size_longer_side: 560 # like DeDoDe
+  - _target_: ripe.data.datasets.disk_megadepth.DISK_Megadepth
+    root: ${oc.env:DATA_DIR}/disk-data
+    stage: train
+    max_scene_size: 10000
+    transforms:
+      _target_: ripe.data.data_transforms.Compose
+      transforms:
+        - _target_: ripe.data.data_transforms.Normalize
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+        - _target_: ripe.data.data_transforms.ResizeAndPadWithHomography
+          target_size_longer_side: 560

imcui/third_party/RIPE/conf/descriptor_loss/contrastive_loss.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+_target_: ripe.losses.contrastive_loss.ContrastiveLoss
+pos_margin: 0.2
+neg_margin: 0.2

imcui/third_party/RIPE/conf/inl_th/constant.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ _target_: ripe.scheduler.constant.ConstantScheduler
2	+ value: 1.0

imcui/third_party/RIPE/conf/inl_th/exp_decay.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+_target_: ripe.scheduler.expDecay.ExpDecay
+a: 2.5
+b: 0.0005
+c: 0.5

imcui/third_party/RIPE/conf/matcher/concurrent_mnn_poselib.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+_target_: ripe.matcher.concurrent_matcher.ConcurrentMatcher
+min_num_matches: 8
+matcher:
+  _target_: kornia.feature.DescriptorMatcher
+  match_mode: "mnn"
+  th: 0.8
+robust_estimator:
+  _target_: ripe.matcher.pose_estimator_poselib.PoseLibRelativePoseEstimator

imcui/third_party/RIPE/conf/train.yaml ADDED Viewed

	@@ -0,0 +1,89 @@

+defaults:
+  - data: disk_megadepth # megadepth+acdc or megadepth+tokyo
+  - backbones: vgg
+  - upsampler: hypercolumn_features # interpolate_sparse2D
+  - matcher: concurrent_mnn_poselib
+  - descriptor_loss: contrastive_loss # none to deactivate
+  - inl_th: constant # exp_decay
+  - _self_
+project_name: ???
+name: ???
+hydra:
+  run:
+    dir: ${oc.env:OUTPUT_DIR}/${project_name}/${name}/${oc.env:SLURM_JOB_ID}/${now:%Y-%m-%d}/${now:%H-%M-%S}
+output_dir: ${hydra:runtime.output_dir}
+num_gpus: 1
+# precision: "32-true"
+precision: "bf16-mixed" # numerically more stable
+# precision: "16-mixed"
+log_interval: 50 # log every N steps/ batches
+wandb_mode: online
+val_interval: 2000
+conf_inference:
+  threshold: 0.5
+  top_k: 2048
+desc_loss_weight: 5.0 # 0.0 to deactivate, also deactivates 1x1 conv
+num_workers: 8
+batch_size: 6
+transformation_model: fundamental
+network:
+  _target_: ripe.models.ripe.RIPE
+  _partial_: true
+  window_size: 8
+  non_linearity_dect:
+    _target_: torch.nn.Identity
+    # _target_: torch.nn.ReLU
+  desc_shares:
+    null
+    # - 64
+    # - 64
+    # - 64
+    # - 64
+lr: 0.001 # 0.001 makes it somewhat unstable
+fp_penalty: -1e-7 # -1e-7
+kp_penalty: -7e-7 # -7e-7
+num_grad_accs: 4
+reward_type: inlier # inlier_ratio , inlier+inlier_ratio
+no_filtering_negatives: False
+descriptor_dim: 256
+lr_scheduler:
+  _partial_: true
+  _target_: ripe.scheduler.linearLR.StepLinearLR
+  num_steps: ${num_steps}
+  initial_lr: ${lr}
+  final_lr: 1e-6
+use_whitening: false
+selected_only: False
+padding_filter_mode: ignore
+# padding_filter_mode: punish
+num_steps: 80000
+alpha_scheduler: # 1.0 after 1/3 of the steps
+  _target_: ripe.scheduler.linear_with_plateaus.LinearWithPlateaus
+  start_val: 0.0
+  end_val: 1.0
+  steps_total: ${num_steps}
+  rel_length_start_plateau: 0.0
+  rel_length_end_plateu: 0.6666666
+beta_scheduler: # linear increase over all steps
+  _target_: ripe.scheduler.linear_with_plateaus.LinearWithPlateaus
+  start_val: 0.0
+  end_val: 1.0
+  steps_total: ${num_steps}
+  rel_length_start_plateau: 0.0
+  rel_length_end_plateu: 0.0

imcui/third_party/RIPE/conf/upsampler/hypercolumn_features.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ _target_: ripe.models.upsampler.hypercolumn_features.HyperColumnFeatures
2	+ mode: bilinear

imcui/third_party/RIPE/conf/upsampler/interpolate_sparse2D.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ _target_: ripe.models.upsampler.interpolate_sparse2d.InterpolateSparse2d

imcui/third_party/RIPE/data/download_disk_data.sh ADDED Viewed

	@@ -0,0 +1,43 @@

+#/usr/bin/env bash
+# get the data (zipped)
+# wget -r https://datasets.epfl.ch/disk-data/index.html
+cd datasets.epfl.ch/disk-data;
+# check for MD5 match
+# md5sum -c md5sum.txt;
+# if [ $? ]; then
+#     echo "MD5 mismatch (corrupt download)";
+#     return 1;
+# fi
+# create a crude progress counter
+ITER=1;
+TOTAL=138;
+# unzip test scenes
+cd imw2020-val/scenes;
+for SCENE_TAR in *.tar.gz; do
+    echo "Unzipping $SCENE_TAR ($ITER / $TOTAL)";
+    tar -xz --strip-components=3 -f $SCENE_TAR;
+    rm $SCENE_TAR;
+    ITER=$(($ITER+1));
+done
+# unzip megadepth scenes
+cd ../../megadepth/scenes;
+for SCENE_TAR in *.tar; do
+    echo "Unzipping $SCENE_TAR ($ITER / $TOTAL)";
+    tar -x --strip-components=3 -f $SCENE_TAR;
+    rm $SCENE_TAR;
+    ITER=$(($ITER+1));
+done
+cd ../../../../
+mv datasets.epfl.ch/disk-data ./
+rm -rf datasets.epfl.ch

imcui/third_party/RIPE/demo.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import cv2
+import kornia.feature as KF
+import kornia.geometry as KG
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from torchvision.io import decode_image
+from ripe import vgg_hyper
+from ripe.utils.utils import cv2_matches_from_kornia, resize_image, to_cv_kpts
+dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = vgg_hyper().to(dev)
+model.eval()
+image1 = resize_image(decode_image("assets/all_souls_000013.jpg").float().to(dev) / 255.0)
+image2 = resize_image(decode_image("assets/all_souls_000055.jpg").float().to(dev) / 255.0)
+kpts_1, desc_1, score_1 = model.detectAndCompute(image1, threshold=0.5, top_k=2048)
+kpts_2, desc_2, score_2 = model.detectAndCompute(image2, threshold=0.5, top_k=2048)
+matcher = KF.DescriptorMatcher("mnn")  # threshold is not used with mnn
+match_dists, match_idxs = matcher(desc_1, desc_2)
+matched_pts_1 = kpts_1[match_idxs[:, 0]]
+matched_pts_2 = kpts_2[match_idxs[:, 1]]
+H, mask = KG.ransac.RANSAC(model_type="fundamental", inl_th=1.0)(matched_pts_1, matched_pts_2)
+matchesMask = mask.int().ravel().tolist()
+result_ransac = cv2.drawMatches(
+    (image1.cpu().permute(1, 2, 0).numpy() * 255.0).astype(np.uint8),
+    to_cv_kpts(kpts_1, score_1),
+    (image2.cpu().permute(1, 2, 0).numpy() * 255.0).astype(np.uint8),
+    to_cv_kpts(kpts_2, score_2),
+    cv2_matches_from_kornia(match_dists, match_idxs),
+    None,
+    matchColor=(0, 255, 0),
+    matchesMask=matchesMask,
+    # matchesMask=None, # without RANSAC filtering
+    singlePointColor=(0, 0, 255),
+    flags=cv2.DrawMatchesFlags_DEFAULT,
+)
+plt.imshow(result_ransac)
+plt.axis("off")
+plt.tight_layout()
+# plt.show()
+plt.savefig("result_ransac.png")

imcui/third_party/RIPE/ripe/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .model_zoo import vgg_hyper # noqa: F401

imcui/third_party/RIPE/ripe/benchmarks/imw_2020.py ADDED Viewed

	@@ -0,0 +1,320 @@

+import os
+from pathlib import Path
+import cv2
+import kornia.feature as KF
+import matplotlib.pyplot as plt
+import numpy as np
+import poselib
+import torch
+from tqdm import tqdm
+from ripe import utils
+from ripe.data.data_transforms import Compose, Normalize, Resize
+from ripe.data.datasets.disk_imw import DISK_IMW
+from ripe.utils.pose_error import AUCMetric, relative_pose_error
+from ripe.utils.utils import (
+    cv2_matches_from_kornia,
+    cv_resize_and_pad_to_shape,
+    to_cv_kpts,
+)
+log = utils.get_pylogger(__name__)
+class IMW_2020_Benchmark:
+    def __init__(
+        self,
+        use_predefined_subset: bool = True,
+        conf_inference=None,
+        edge_input_divisible_by=None,
+    ):
+        data_dir = os.getenv("DATA_DIR")
+        if data_dir is None:
+            raise ValueError("Environment variable DATA_DIR is not set.")
+        root_path = Path(data_dir) / "disk-data"
+        self.data = DISK_IMW(
+            str(
+                root_path
+            ),  # Resize only to ensure that the input size is divisible the value of edge_input_divisible_by
+            transforms=Compose(
+                [
+                    Resize(None, edge_input_divisible_by),
+                    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+                ]
+            ),
+        )
+        self.ids_subset = None
+        self.results = []
+        self.conf_inference = conf_inference
+        # fmt: off
+        if use_predefined_subset:
+            self.ids_subset = [4921, 3561, 3143, 6040, 802, 6828, 5338, 9275, 10764, 10085, 5124, 11355, 7, 10027, 2161, 4433, 6887, 3311, 10766,
+                               11451, 11433, 8539, 2581, 10300, 10562, 1723, 8803, 6275, 10140, 11487, 6238, 638, 8092, 9979, 201, 10394, 3414,
+                               9002, 7456, 2431, 632, 6589, 9265, 9889, 3139, 7890, 10619, 4899, 675, 176, 4309, 4814, 3833, 3519, 148, 4560, 10705,
+                               3744, 1441, 4049, 1791, 5106, 575, 1540, 1105, 6791, 1383, 9344, 501, 2504, 4335, 8992, 10970, 10786, 10405, 9317,
+                               5279, 1396, 5044, 9408, 11125, 10417, 7627, 7480, 1358, 7738, 5461, 10178, 9226, 8106, 2766, 6216, 4032, 7298, 259,
+                               3021, 2645, 8756, 7513, 3163, 2510, 6701, 6684, 3159, 9689, 7425, 6066, 1904, 6382, 3052, 777, 6277, 7409, 5997, 2987,
+                               11316, 2894, 4528, 1927, 10366, 8605, 2726, 1886, 2416, 2164, 3352, 2997, 6636, 6765, 5609, 3679, 76, 10956, 3612, 6699,
+                               1741, 8811, 3755, 1285, 9520, 2476, 3977, 370, 9823, 1834, 7551, 6227, 7303, 6399, 4758, 10713, 5050, 380, 11056, 7620,
+                               4826, 6090, 9011, 7523, 7355, 8021, 9801, 1801, 6522, 7138, 10017, 8732, 6402, 3116, 4031, 6088, 3975, 9841, 9082, 9412,
+                               5406, 217, 2385, 8791, 8361, 494, 4319, 5275, 3274, 335, 6731, 207, 10095, 3068, 5996, 3951, 2808, 5877, 6134, 7772, 10042,
+                               8574, 5501, 10885, 7871]
+            # self.ids_subset = self.ids_subset[:10]
+        # fmt: on
+    def evaluate_sample(self, model, sample, dev):
+        img_1 = sample["src_image"].unsqueeze(0).to(dev)
+        img_2 = sample["trg_image"].unsqueeze(0).to(dev)
+        scale_h_1, scale_w_1 = (
+            sample["orig_size_src"][0] / img_1.shape[2],
+            sample["orig_size_src"][1] / img_1.shape[3],
+        )
+        scale_h_2, scale_w_2 = (
+            sample["orig_size_trg"][0] / img_2.shape[2],
+            sample["orig_size_trg"][1] / img_2.shape[3],
+        )
+        M = None
+        info = {}
+        kpts_1, desc_1, score_1 = None, None, None
+        kpts_2, desc_2, score_2 = None, None, None
+        match_dists, match_idxs = None, None
+        try:
+            kpts_1, desc_1, score_1 = model.detectAndCompute(img_1, **self.conf_inference)
+            kpts_2, desc_2, score_2 = model.detectAndCompute(img_2, **self.conf_inference)
+            if kpts_1.dim() == 3:
+                assert kpts_1.shape[0] == 1 and kpts_2.shape[0] == 1, "Batch size must be 1"
+                kpts_1, desc_1, score_1 = (
+                    kpts_1.squeeze(0),
+                    desc_1[0].squeeze(0),
+                    score_1[0].squeeze(0),
+                )
+                kpts_2, desc_2, score_2 = (
+                    kpts_2.squeeze(0),
+                    desc_2[0].squeeze(0),
+                    score_2[0].squeeze(0),
+                )
+            scale_1 = torch.tensor([scale_w_1, scale_h_1], dtype=torch.float).to(dev)
+            scale_2 = torch.tensor([scale_w_2, scale_h_2], dtype=torch.float).to(dev)
+            kpts_1 = kpts_1 * scale_1
+            kpts_2 = kpts_2 * scale_2
+            matcher = KF.DescriptorMatcher("mnn")  # threshold is not used with mnn
+            match_dists, match_idxs = matcher(desc_1, desc_2)
+            matched_pts_1 = kpts_1[match_idxs[:, 0]]
+            matched_pts_2 = kpts_2[match_idxs[:, 1]]
+            camera_1 = sample["src_camera"]
+            camera_2 = sample["trg_camera"]
+            M, info = poselib.estimate_relative_pose(
+                matched_pts_1.cpu().numpy(),
+                matched_pts_2.cpu().numpy(),
+                camera_1.to_cameradict(),
+                camera_2.to_cameradict(),
+                {
+                    "max_epipolar_error": 0.5,
+                },
+                {},
+            )
+        except RuntimeError as e:
+            if "No keypoints detected" in str(e):
+                pass
+            else:
+                raise e
+        success = M is not None
+        if success:
+            M = {
+                "R": torch.tensor(M.R, dtype=torch.float),
+                "t": torch.tensor(M.t, dtype=torch.float),
+            }
+            inl = info["inliers"]
+        else:
+            M = {
+                "R": torch.eye(3, dtype=torch.float),
+                "t": torch.zeros((3), dtype=torch.float),
+            }
+            inl = np.zeros((0,)).astype(bool)
+        t_err, r_err = relative_pose_error(sample["s2t_R"].cpu(), sample["s2t_T"].cpu(), M["R"], M["t"])
+        rel_pose_error = max(t_err.item(), r_err.item()) if success else np.inf
+        ransac_inl = np.sum(inl)
+        ransac_inl_ratio = np.mean(inl)
+        if success:
+            assert match_dists is not None and match_idxs is not None, "Matches must be computed"
+            cv_keypoints_src = to_cv_kpts(kpts_1, score_1)
+            cv_keypoints_trg = to_cv_kpts(kpts_2, score_2)
+            cv_matches = cv2_matches_from_kornia(match_dists, match_idxs)
+            cv_mask = [int(m) for m in inl]
+        else:
+            cv_keypoints_src, cv_keypoints_trg = [], []
+            cv_matches, cv_mask = [], []
+        estimation = {
+            "success": success,
+            "M_0to1": M,
+            "inliers": torch.tensor(inl).to(img_1),
+            "rel_pose_error": rel_pose_error,
+            "ransac_inl": ransac_inl,
+            "ransac_inl_ratio": ransac_inl_ratio,
+            "path_src_image": sample["src_path"],
+            "path_trg_image": sample["trg_path"],
+            "cv_keypoints_src": cv_keypoints_src,
+            "cv_keypoints_trg": cv_keypoints_trg,
+            "cv_matches": cv_matches,
+            "cv_mask": cv_mask,
+        }
+        return estimation
+    def evaluate(self, model, dev, progress_bar=False):
+        model.eval()
+        # reset results
+        self.results = []
+        for idx in tqdm(
+            self.ids_subset if self.ids_subset is not None else range(len(self.data)),
+            disable=not progress_bar,
+        ):
+            sample = self.data[idx]
+            self.results.append(self.evaluate_sample(model, sample, dev))
+    def get_auc(self, threshold=5, downsampled=False):
+        if len(self.results) == 0:
+            raise ValueError("No results to log. Run evaluate first.")
+        summary_results = self.calc_auc(downsampled=downsampled)
+        return summary_results[f"rel_pose_error@{threshold}°{'__original' if not downsampled else '__downsampled'}"]
+    def plot_results(self, num_samples=10, logger=None, step=None, downsampled=False):
+        if len(self.results) == 0:
+            raise ValueError("No results to plot. Run evaluate first.")
+        plot_data = []
+        for result in self.results[:num_samples]:
+            img1 = cv2.imread(result["path_src_image"])
+            img2 = cv2.imread(result["path_trg_image"])
+            # from BGR to RGB
+            img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
+            img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
+            plt_matches = cv2.drawMatches(
+                img1,
+                result["cv_keypoints_src"],
+                img2,
+                result["cv_keypoints_trg"],
+                result["cv_matches"],
+                None,
+                matchColor=None,
+                matchesMask=result["cv_mask"],
+                flags=cv2.DrawMatchesFlags_DEFAULT,
+            )
+            file_name = (
+                Path(result["path_src_image"]).parent.parent.name
+                + "_"
+                + Path(result["path_src_image"]).stem
+                + Path(result["path_trg_image"]).stem
+                + ("_downsampled" if downsampled else "")
+                + ".png"
+            )
+            # print rel_pose_error on image
+            plt_matches = cv2.putText(
+                plt_matches,
+                f"rel_pose_error: {result['rel_pose_error']:.2f} num_inliers: {result['ransac_inl']} inl_ratio: {result['ransac_inl_ratio']:.2f} num_matches: {len(result['cv_matches'])} num_keypoints: {len(result['cv_keypoints_src'])}/{len(result['cv_keypoints_trg'])}",
+                (10, 30),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                1,
+                (0, 0, 0),
+                2,
+                cv2.LINE_8,
+            )
+            plot_data.append({"file_name": file_name, "image": plt_matches})
+        if logger is None:
+            log.info("No logger provided. Using plt to plot results.")
+            for image in plot_data:
+                plt.imsave(
+                    image["file_name"],
+                    cv_resize_and_pad_to_shape(image["image"], (1024, 2048)),
+                )
+                plt.close()
+        else:
+            import wandb
+            log.info(f"Logging images to wandb with step={step}")
+            if not downsampled:
+                logger.log(
+                    {
+                        "examples": [
+                            wandb.Image(cv_resize_and_pad_to_shape(image["image"], (1024, 2048))) for image in plot_data
+                        ]
+                    },
+                    step=step,
+                )
+            else:
+                logger.log(
+                    {
+                        "examples_downsampled": [
+                            wandb.Image(cv_resize_and_pad_to_shape(image["image"], (1024, 2048))) for image in plot_data
+                        ]
+                    },
+                    step=step,
+                )
+    def log_results(self, logger=None, step=None, downsampled=False):
+        if len(self.results) == 0:
+            raise ValueError("No results to log. Run evaluate first.")
+        summary_results = self.calc_auc(downsampled=downsampled)
+        if logger is not None:
+            logger.log(summary_results, step=step)
+        else:
+            log.warning("No logger provided. Printing results instead.")
+            print(self.calc_auc())
+    def print_results(self):
+        if len(self.results) == 0:
+            raise ValueError("No results to print. Run evaluate first.")
+        print(self.calc_auc())
+    def calc_auc(self, auc_thresholds=None, downsampled=False):
+        if auc_thresholds is None:
+            auc_thresholds = [5, 10, 20]
+        if not isinstance(auc_thresholds, list):
+            auc_thresholds = [auc_thresholds]
+        if len(self.results) == 0:
+            raise ValueError("No results to calculate auc. Run evaluate first.")
+        rel_pose_errors = [r["rel_pose_error"] for r in self.results]
+        pose_aucs = AUCMetric(auc_thresholds, rel_pose_errors).compute()
+        assert isinstance(pose_aucs, list) and len(pose_aucs) == len(auc_thresholds)
+        ext = "_downsampled" if downsampled else "_original"
+        summary = {}
+        for i, ath in enumerate(auc_thresholds):
+            summary[f"rel_pose_error@{ath}°_{ext}"] = pose_aucs[i]
+        return summary

imcui/third_party/RIPE/ripe/data/__init__.py ADDED Viewed

File without changes

imcui/third_party/RIPE/ripe/data/data_transforms.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import collections
+import collections.abc
+import kornia.geometry as KG
+import numpy as np
+import torch
+from torchvision.transforms import functional as TF
+class Compose:
+    """Composes several transforms together. The transforms are applied in the order they are passed in.
+    Args:        transforms (list): A list of transforms to be applied.
+    """
+    def __init__(self, transforms):
+        self.transforms = transforms
+    def __call__(self, src, trg, src_mask, trg_mask, h):
+        for t in self.transforms:
+            src, trg, src_mask, trg_mask, h = t(src, trg, src_mask, trg_mask, h)
+        return src, trg, src_mask, trg_mask, h
+class Transform:
+    """Base class for all transforms. It provides a method to apply a transformation function to the input images and masks.
+    Args:
+        src (torch.Tensor): The source image tensor.
+        trg (torch.Tensor): The target image tensor.
+        src_mask (torch.Tensor): The source image mask tensor.
+        trg_mask (torch.Tensor): The target image mask tensor.
+        h (torch.Tensor): The homography matrix tensor.
+    Returns:
+        tuple: A tuple containing the transformed source image, the transformed target image, the transformed source mask,
+        the transformed target mask and the updated homography matrix.
+    """
+    def __init__(self):
+        pass
+    def apply_transform(self, src, trg, src_mask, trg_mask, h, transfrom_function):
+        src, trg, src_mask, trg_mask, h = transfrom_function(src, trg, src_mask, trg_mask, h)
+        return src, trg, src_mask, trg_mask, h
+class Normalize(Transform):
+    def __init__(self, mean, std):
+        self.mean = mean
+        self.std = std
+    def __call__(self, src, trg, src_mask, trg_mask, h):
+        return self.apply_transform(src, trg, src_mask, trg_mask, h, self.transform_function)
+    def transform_function(self, src, trg, src_mask, trg_mask, h):
+        src = TF.normalize(src, mean=self.mean, std=self.std)
+        trg = TF.normalize(trg, mean=self.mean, std=self.std)
+        return src, trg, src_mask, trg_mask, h
+class ResizeAndPadWithHomography(Transform):
+    def __init__(self, target_size_longer_side=768):
+        self.target_size = target_size_longer_side
+    def __call__(self, src, trg, src_mask, trg_mask, h):
+        return self.apply_transform(src, trg, src_mask, trg_mask, h, self.transform_function)
+    def transform_function(self, src, trg, src_mask, trg_mask, h):
+        src_w, src_h = src.shape[-1], src.shape[-2]
+        trg_w, trg_h = trg.shape[-1], trg.shape[-2]
+        # Resizing logic for both images
+        scale_src, new_src_w, new_src_h = self.compute_resize(src_w, src_h)
+        scale_trg, new_trg_w, new_trg_h = self.compute_resize(trg_w, trg_h)
+        # Resize both images
+        src_resized = TF.resize(src, [new_src_h, new_src_w])
+        trg_resized = TF.resize(trg, [new_trg_h, new_trg_w])
+        src_mask_resized = TF.resize(src_mask, [new_src_h, new_src_w])
+        trg_mask_resized = TF.resize(trg_mask, [new_trg_h, new_trg_w])
+        # Pad the resized images to be square (768x768)
+        src_padded, src_padding = self.apply_padding(src_resized, new_src_w, new_src_h)
+        trg_padded, trg_padding = self.apply_padding(trg_resized, new_trg_w, new_trg_h)
+        src_mask_padded, _ = self.apply_padding(src_mask_resized, new_src_w, new_src_h)
+        trg_mask_padded, _ = self.apply_padding(trg_mask_resized, new_trg_w, new_trg_h)
+        # Update the homography matrix
+        h = self.update_homography(h, scale_src, src_padding, scale_trg, trg_padding)
+        return src_padded, trg_padded, src_mask_padded, trg_mask_padded, h
+    def compute_resize(self, w, h):
+        if w > h:
+            scale = self.target_size / w
+            new_w = self.target_size
+            new_h = int(h * scale)
+        else:
+            scale = self.target_size / h
+            new_h = self.target_size
+            new_w = int(w * scale)
+        return scale, new_w, new_h
+    def apply_padding(self, img, new_w, new_h):
+        pad_w = (self.target_size - new_w) // 2
+        pad_h = (self.target_size - new_h) // 2
+        padding = [
+            pad_w,
+            pad_h,
+            self.target_size - new_w - pad_w,
+            self.target_size - new_h - pad_h,
+        ]
+        img_padded = TF.pad(img, padding, fill=0)  # Zero-pad
+        return img_padded, padding
+    def update_homography(self, h, scale_src, padding_src, scale_trg, padding_trg):
+        # Create the scaling matrices
+        scale_matrix_src = np.array([[scale_src, 0, 0], [0, scale_src, 0], [0, 0, 1]])
+        scale_matrix_trg = np.array([[scale_trg, 0, 0], [0, scale_trg, 0], [0, 0, 1]])
+        # Create the padding translation matrices
+        pad_matrix_src = np.array([[1, 0, padding_src[0]], [0, 1, padding_src[1]], [0, 0, 1]])
+        pad_matrix_trg = np.array([[1, 0, -padding_trg[0]], [0, 1, -padding_trg[1]], [0, 0, 1]])
+        # Update the homography: apply scaling and translation
+        h_updated = (
+            pad_matrix_trg
+            @ scale_matrix_trg
+            @ h.numpy()
+            @ np.linalg.inv(scale_matrix_src)
+            @ np.linalg.inv(pad_matrix_src)
+        )
+        return torch.from_numpy(h_updated).float()
+class Resize(Transform):
+    def __init__(self, output_size, edge_divisible_by=None, side="long", antialias=True):
+        self.output_size = output_size
+        self.edge_divisible_by = edge_divisible_by
+        self.side = side
+        self.antialias = antialias
+    def __call__(self, src, trg, src_mask, trg_mask, h):
+        return self.apply_transform(src, trg, src_mask, trg_mask, h, self.transform_function)
+    def transform_function(self, src, trg, src_mask, trg_mask, h):
+        new_size_src = self.get_new_image_size(src)
+        new_size_trg = self.get_new_image_size(trg)
+        src, T_src = self.resize(src, new_size_src)
+        trg, T_trg = self.resize(trg, new_size_trg)
+        src_mask, _ = self.resize(src_mask, new_size_src)
+        trg_mask, _ = self.resize(trg_mask, new_size_trg)
+        h = torch.from_numpy(T_trg @ h.numpy() @ T_src).float()
+        return src, trg, src_mask, trg_mask, h
+    def resize(self, img, size):
+        h, w = img.shape[-2:]
+        img = KG.transform.resize(
+            img,
+            size,
+            side=self.side,
+            antialias=self.antialias,
+            align_corners=None,
+            interpolation="bilinear",
+        )
+        scale = torch.Tensor([img.shape[-1] / w, img.shape[-2] / h]).to(img)
+        T = np.diag([scale[0].item(), scale[1].item(), 1])
+        return img, T
+    def get_new_image_size(self, img):
+        h, w = img.shape[-2:]
+        if isinstance(self.output_size, collections.abc.Iterable):
+            assert len(self.output_size) == 2
+            return tuple(self.output_size)
+        if self.output_size is None:  # keep the original size, but possibly make it divisible by edge_divisible_by
+            size = (h, w)
+        else:
+            side_size = self.output_size
+            aspect_ratio = w / h
+            if self.side not in ("short", "long", "vert", "horz"):
+                raise ValueError(f"side can be one of 'short', 'long', 'vert', and 'horz'. Got '{self.side}'")
+            if self.side == "vert":
+                size = side_size, int(side_size * aspect_ratio)
+            elif self.side == "horz":
+                size = int(side_size / aspect_ratio), side_size
+            elif (self.side == "short") ^ (aspect_ratio < 1.0):
+                size = side_size, int(side_size * aspect_ratio)
+            else:
+                size = int(side_size / aspect_ratio), side_size
+        if self.edge_divisible_by is not None:
+            df = self.edge_divisible_by
+            size = list(map(lambda x: int(x // df * df), size))
+        return size

imcui/third_party/RIPE/ripe/data/datasets/__init__.py ADDED Viewed

File without changes

imcui/third_party/RIPE/ripe/data/datasets/acdc.py ADDED Viewed

	@@ -0,0 +1,154 @@

+from pathlib import Path
+from typing import Any, Callable, Dict, Optional
+import torch
+from torch.utils.data import Dataset
+from torchvision.io import read_image
+from ripe import utils
+from ripe.data.data_transforms import Compose
+from ripe.utils.utils import get_other_random_id
+log = utils.get_pylogger(__name__)
+class ACDC(Dataset):
+    def __init__(
+        self,
+        root: Path,
+        stage: str = "train",
+        condition: str = "rain",
+        transforms: Optional[Callable] = None,
+        positive_only: bool = False,
+    ) -> None:
+        self.root = root
+        self.stage = stage
+        self.condition = condition
+        self.transforms = transforms
+        self.positive_only = positive_only
+        if isinstance(self.root, str):
+            self.root = Path(self.root)
+        if not self.root.exists():
+            raise FileNotFoundError(f"Dataset not found at {self.root}")
+        if transforms is None:
+            self.transforms = Compose([])
+        else:
+            self.transforms = transforms
+        if self.stage not in ["train", "val", "test", "pred"]:
+            raise RuntimeError(
+                "Unknown option "
+                + self.stage
+                + " as training stage variable. Valid options: 'train', 'val', 'test' and 'pred'"
+            )
+        if self.stage == "pred":  # prediction uses the test set
+            self.stage = "test"
+        if self.stage in ["val", "test", "pred"]:
+            self.positive_only = True
+            log.info(f"{self.stage} stage: Using only positive pairs!")
+        weather_conditions = ["fog", "night", "rain", "snow"]
+        if self.condition not in weather_conditions + ["all"]:
+            raise RuntimeError(
+                "Unknown option "
+                + self.condition
+                + " as weather condition variable. Valid options: 'fog', 'night', 'rain', 'snow' and 'all'"
+            )
+        self.weather_condition_query = weather_conditions if self.condition == "all" else [self.condition]
+        self._read_sample_files()
+        if positive_only:
+            log.warning("Using only positive pairs!")
+        log.info(f"Found {len(self.src_images)} source images and {len(self.trg_images)} target images.")
+    def _read_sample_files(self):
+        file_name_pattern_ref = "_ref_anon.png"
+        file_name_pattern = "_rgb_anon.png"
+        self.trg_images = []
+        self.src_images = []
+        for weather_condition in self.weather_condition_query:
+            rgb_files = sorted(
+                list(self.root.glob("rgb_anon/" + weather_condition + "/" + self.stage + "/**/*" + file_name_pattern)),
+                key=lambda i: i.stem[:21],
+            )
+            src_images = sorted(
+                list(
+                    self.root.glob(
+                        "rgb_anon/" + weather_condition + "/" + self.stage + "_ref" + "/**/*" + file_name_pattern_ref
+                    )
+                ),
+                key=lambda i: i.stem[:21],
+            )
+            self.trg_images += rgb_files
+            self.src_images += src_images
+    def __len__(self) -> int:
+        if self.positive_only:
+            return len(self.trg_images)
+        return 2 * len(self.trg_images)
+    def __getitem__(self, idx: int) -> Dict[str, Any]:
+        sample: Any = {}
+        positive_sample = (idx % 2 == 0) or (self.positive_only)
+        if not self.positive_only:
+            idx = idx // 2
+        sample["label"] = positive_sample
+        if positive_sample:
+            sample["src_path"] = str(self.src_images[idx])
+            sample["trg_path"] = str(self.trg_images[idx])
+            assert self.src_images[idx].stem[:21] == self.trg_images[idx].stem[:21], (
+                f"Source and target image mismatch: {self.src_images[idx]} vs {self.trg_images[idx]}"
+            )
+            src_img = read_image(sample["src_path"])
+            trg_img = read_image(sample["trg_path"])
+            homography = torch.eye(3, dtype=torch.float32)
+        else:
+            sample["src_path"] = str(self.src_images[idx])
+            idx_other = get_other_random_id(idx, len(self) // 2)
+            sample["trg_path"] = str(self.trg_images[idx_other])
+            assert self.src_images[idx].stem[:21] != self.trg_images[idx_other].stem[:21], (
+                f"Source and target image match for negative sample: {self.src_images[idx]} vs {self.trg_images[idx_other]}"
+            )
+            src_img = read_image(sample["src_path"])
+            trg_img = read_image(sample["trg_path"])
+            homography = torch.zeros((3, 3), dtype=torch.float32)
+        src_img = src_img / 255.0
+        trg_img = trg_img / 255.0
+        _, H, W = src_img.shape
+        src_mask = torch.ones((1, H, W), dtype=torch.uint8)
+        trg_mask = torch.ones((1, H, W), dtype=torch.uint8)
+        if self.transforms:
+            src_img, trg_img, src_mask, trg_mask, _ = self.transforms(src_img, trg_img, src_mask, trg_mask, homography)
+        sample["src_image"] = src_img
+        sample["trg_image"] = trg_img
+        sample["src_mask"] = src_mask.to(torch.bool)
+        sample["trg_mask"] = trg_mask.to(torch.bool)
+        sample["homography"] = homography
+        return sample

imcui/third_party/RIPE/ripe/data/datasets/dataset_combinator.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import torch
+from ripe import utils
+log = utils.get_pylogger(__name__)
+class DatasetCombinator:
+    """Combines multiple datasets into one. Length of the combined dataset is the length of the
+    longest dataset. Shorter datasets are looped over.
+    Args:
+        datasets: List of datasets to combine.
+        mode: How to sample from the datasets. Can be either "uniform" or "weighted".
+            In "uniform" mode, each dataset is sampled with equal probability.
+            In "weighted" mode, each dataset is sampled with probability proportional to its length.
+    """
+    def __init__(self, datasets, mode="uniform", weights=None):
+        self.datasets = datasets
+        names_datasets = [type(ds).__name__ for ds in self.datasets]
+        self.lengths = [len(ds) for ds in datasets]
+        if mode == "weighted":
+            self.probs_datasets = [length / sum(self.lengths) for length in self.lengths]
+        elif mode == "uniform":
+            self.probs_datasets = [1 / len(self.datasets) for _ in self.datasets]
+        elif mode == "custom":
+            assert weights is not None, "Weights must be provided in custom mode"
+            assert len(weights) == len(datasets), "Number of weights must match number of datasets"
+            assert sum(weights) == 1.0, "Weights must sum to 1"
+            self.probs_datasets = weights
+        else:
+            raise ValueError(f"Unknown mode {mode}")
+        log.info("Got the following datasets: ")
+        for name, length, prob in zip(names_datasets, self.lengths, self.probs_datasets):
+            log.info(f"{name} with {length} samples and probability {prob}")
+        log.info(f"Total number of samples: {sum(self.lengths)}")
+        self.num_samples = max(self.lengths)
+        self.dataset_dist = torch.distributions.Categorical(probs=torch.tensor(self.probs_datasets))
+    def __len__(self):
+        return self.num_samples
+    def __getitem__(self, idx: int):
+        positive_sample = idx % 2 == 0
+        if positive_sample:
+            dataset_idx = self.dataset_dist.sample().item()
+            idx = torch.randint(0, self.lengths[dataset_idx], (1,)).item()
+            while idx % 2 == 1:
+                idx = torch.randint(0, self.lengths[dataset_idx], (1,)).item()
+            return self.datasets[dataset_idx][idx]
+        else:
+            dataset_idx_1 = self.dataset_dist.sample().item()
+            dataset_idx_2 = self.dataset_dist.sample().item()
+            if dataset_idx_1 == dataset_idx_2:
+                idx = torch.randint(0, self.lengths[dataset_idx_1], (1,)).item()
+                while idx % 2 == 0:
+                    idx = torch.randint(0, self.lengths[dataset_idx_1], (1,)).item()
+                return self.datasets[dataset_idx_1][idx]
+            else:
+                idx_1 = torch.randint(0, self.lengths[dataset_idx_1], (1,)).item()
+                idx_2 = torch.randint(0, self.lengths[dataset_idx_2], (1,)).item()
+                sample_1 = self.datasets[dataset_idx_1][idx_1]
+                sample_2 = self.datasets[dataset_idx_2][idx_2]
+                sample = {
+                    "label": False,
+                    "src_path": sample_1["src_path"],
+                    "trg_path": sample_2["trg_path"],
+                    "src_image": sample_1["src_image"],
+                    "trg_image": sample_2["trg_image"],
+                    "src_mask": sample_1["src_mask"],
+                    "trg_mask": sample_2["trg_mask"],
+                    "homography": sample_2["homography"],
+                }
+                return sample

imcui/third_party/RIPE/ripe/data/datasets/disk_imw.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import json
+import random
+from itertools import accumulate
+from pathlib import Path
+from typing import Any, Callable, Dict, Optional, Tuple
+import torch
+from torch.utils.data import Dataset
+from torchvision.io import read_image
+from ripe import utils
+from ripe.data.data_transforms import Compose
+from ripe.utils.image_utils import Camera, cameras2F
+log = utils.get_pylogger(__name__)
+class DISK_IMW(Dataset):
+    def __init__(
+        self,
+        root: str,
+        stage: str = "val",
+        # condition: str = "rain",
+        transforms: Optional[Callable] = None,
+    ) -> None:
+        self.root = root
+        self.stage = stage
+        self.transforms = transforms
+        if isinstance(self.root, str):
+            self.root = Path(self.root)
+        if not self.root.exists():
+            raise FileNotFoundError(f"Dataset not found at {self.root}")
+        if transforms is None:
+            self.transforms = Compose([])
+        else:
+            self.transforms = transforms
+        if self.stage not in ["val"]:
+            raise RuntimeError("Unknown option " + self.stage + " as training stage variable. Valid options: 'train'")
+        json_path = self.root / "imw2020-val" / "dataset.json"
+        with open(json_path) as json_file:
+            json_data = json.load(json_file)
+        self.scenes = []
+        for scene in json_data:
+            self.scenes.append(Scene(self.root / "imw2020-val", json_data[scene]))
+        self.tuples_per_scene = [len(scene) for scene in self.scenes]
+    def __len__(self) -> int:
+        return sum(self.tuples_per_scene)
+    def __getitem__(self, idx: int) -> Dict[str, Any]:
+        sample: Any = {}
+        i_scene, i_image = self._get_scene_and_image_id_from_idx(idx)
+        sample["src_path"], sample["trg_path"], path_calib_src, path_calib_trg = self.scenes[i_scene][i_image]
+        cam_src = Camera.from_calibration_file(path_calib_src)
+        cam_trg = Camera.from_calibration_file(path_calib_trg)
+        F = self.get_F(cam_src, cam_trg)
+        s2t_R, s2t_T = self.get_relative_pose(cam_src, cam_trg)
+        src_img = read_image(sample["src_path"]) / 255.0
+        trg_img = read_image(sample["trg_path"]) / 255.0
+        _, H_src, W_src = src_img.shape
+        _, H_trg, W_trg = trg_img.shape
+        src_mask = torch.ones((1, H_src, W_src), dtype=torch.uint8)
+        trg_mask = torch.ones((1, H_trg, W_trg), dtype=torch.uint8)
+        H = torch.eye(3)
+        if self.transforms:
+            src_img, trg_img, src_mask, trg_mask, _ = self.transforms(src_img, trg_img, src_mask, trg_mask, H)
+        # check if transformations in self.transforms. Only Normalize is allowed
+        for t in self.transforms.transforms:
+            if t.__class__.__name__ not in ["Normalize", "Resize"]:
+                raise ValueError(f"Transform {t.__class__.__name__} not allowed in DISK_IMW dataset")
+        sample["src_image"] = src_img
+        sample["trg_image"] = trg_img
+        sample["orig_size_src"] = (H_src, W_src)
+        sample["orig_size_trg"] = (H_trg, W_trg)
+        sample["src_mask"] = src_mask.to(torch.bool)
+        sample["trg_mask"] = trg_mask.to(torch.bool)
+        sample["F"] = F
+        sample["s2t_R"] = s2t_R
+        sample["s2t_T"] = s2t_T
+        sample["src_camera"] = cam_src
+        sample["trg_camera"] = cam_trg
+        return sample
+    def get_relative_pose(self, cam_src: Camera, cam_trg: Camera) -> Tuple[torch.Tensor, torch.Tensor]:
+        R = cam_trg.R @ cam_src.R.T
+        T = cam_trg.t - R @ cam_src.t
+        return R, T
+    def get_F(self, cam_src: Camera, cam_trg: Camera) -> torch.Tensor:
+        F = cameras2F(cam_src, cam_trg)
+        return F
+    def _get_scene_and_image_id_from_idx(self, idx: int) -> Tuple[int, int]:
+        accumulated_tuples = accumulate(self.tuples_per_scene)
+        if idx >= sum(self.tuples_per_scene):
+            raise IndexError(f"Index {idx} out of bounds")
+        idx_scene = None
+        for i, accumulated_tuple in enumerate(accumulated_tuples):
+            idx_scene = i
+            if idx < accumulated_tuple:
+                break
+        idx_image = idx - sum(self.tuples_per_scene[:idx_scene])
+        return idx_scene, idx_image
+    def _get_other_random_scene_and_image_id(self, scene_id_to_exclude: int) -> Tuple[int, int]:
+        possible_scene_ids = list(range(len(self.scenes)))
+        possible_scene_ids.remove(scene_id_to_exclude)
+        idx_scene = random.choice(possible_scene_ids)
+        idx_image = random.randint(0, len(self.scenes[idx_scene]) - 1)
+        return idx_scene, idx_image
+class Scene:
+    def __init__(self, root_path, scene_data: Dict[str, Any]) -> None:
+        self.root_path = root_path
+        self.image_path = Path(scene_data["image_path"])
+        self.calib_path = Path(scene_data["calib_path"])
+        self.image_names = scene_data["images"]
+        self.tuples = scene_data["tuples"]
+    def __len__(self) -> int:
+        return len(self.tuples)
+    def __getitem__(self, idx: int) -> Dict[str, Any]:
+        idx_1 = self.tuples[idx][0]
+        idx_2 = self.tuples[idx][1]
+        path_image_1 = str(self.root_path / self.image_path / self.image_names[idx_1]) + ".jpg"
+        path_image_2 = str(self.root_path / self.image_path / self.image_names[idx_2]) + ".jpg"
+        path_calib_1 = str(self.root_path / self.calib_path / ("calibration_" + self.image_names[idx_1])) + ".h5"
+        path_calib_2 = str(self.root_path / self.calib_path / ("calibration_" + self.image_names[idx_2])) + ".h5"
+        return path_image_1, path_image_2, path_calib_1, path_calib_2

imcui/third_party/RIPE/ripe/data/datasets/disk_megadepth.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import json
+import random
+from itertools import accumulate
+from pathlib import Path
+from typing import Any, Callable, Dict, Optional, Tuple
+import torch
+from torch.utils.data import Dataset
+from torchvision.io import read_image
+from ripe import utils
+from ripe.data.data_transforms import Compose
+log = utils.get_pylogger(__name__)
+class DISK_Megadepth(Dataset):
+    def __init__(
+        self,
+        root: str,
+        max_scene_size: int,
+        stage: str = "train",
+        # condition: str = "rain",
+        transforms: Optional[Callable] = None,
+        positive_only: bool = False,
+    ) -> None:
+        self.root = root
+        self.stage = stage
+        self.transforms = transforms
+        self.positive_only = positive_only
+        if isinstance(self.root, str):
+            self.root = Path(self.root)
+        if not self.root.exists():
+            raise FileNotFoundError(f"Dataset not found at {self.root}")
+        if transforms is None:
+            self.transforms = Compose([])
+        else:
+            self.transforms = transforms
+        if self.stage not in ["train"]:
+            raise RuntimeError("Unknown option " + self.stage + " as training stage variable. Valid options: 'train'")
+        json_path = self.root / "megadepth" / "dataset.json"
+        with open(json_path) as json_file:
+            json_data = json.load(json_file)
+        self.scenes = []
+        for scene in json_data:
+            self.scenes.append(Scene(self.root / "megadepth", json_data[scene], max_scene_size))
+        self.tuples_per_scene = [len(scene) for scene in self.scenes]
+        if positive_only:
+            log.warning("Using only positive pairs!")
+    def __len__(self) -> int:
+        if self.positive_only:
+            return sum(self.tuples_per_scene)
+        return 2 * sum(self.tuples_per_scene)
+    def __getitem__(self, idx: int) -> Dict[str, Any]:
+        sample: Any = {}
+        positive_sample = idx % 2 == 0 or self.positive_only
+        if not self.positive_only:
+            idx = idx // 2
+        sample["label"] = positive_sample
+        i_scene, i_image = self._get_scene_and_image_id_from_idx(idx)
+        if positive_sample:
+            sample["src_path"], sample["trg_path"] = self.scenes[i_scene][i_image]
+            homography = torch.eye(3, dtype=torch.float32)
+        else:
+            sample["src_path"], _ = self.scenes[i_scene][i_image]
+            i_scene_other, i_image_other = self._get_other_random_scene_and_image_id(i_scene)
+            sample["trg_path"], _ = self.scenes[i_scene_other][i_image_other]
+            homography = torch.zeros((3, 3), dtype=torch.float32)
+        src_img = read_image(sample["src_path"]) / 255.0
+        trg_img = read_image(sample["trg_path"]) / 255.0
+        _, H_src, W_src = src_img.shape
+        _, H_trg, W_trg = trg_img.shape
+        src_mask = torch.ones((1, H_src, W_src), dtype=torch.uint8)
+        trg_mask = torch.ones((1, H_trg, W_trg), dtype=torch.uint8)
+        if self.transforms:
+            src_img, trg_img, src_mask, trg_mask, _ = self.transforms(src_img, trg_img, src_mask, trg_mask, homography)
+        sample["src_image"] = src_img
+        sample["trg_image"] = trg_img
+        sample["src_mask"] = src_mask.to(torch.bool)
+        sample["trg_mask"] = trg_mask.to(torch.bool)
+        sample["homography"] = homography
+        return sample
+    def _get_scene_and_image_id_from_idx(self, idx: int) -> Tuple[int, int]:
+        accumulated_tuples = accumulate(self.tuples_per_scene)
+        if idx >= sum(self.tuples_per_scene):
+            raise IndexError(f"Index {idx} out of bounds")
+        idx_scene = None
+        for i, accumulated_tuple in enumerate(accumulated_tuples):
+            idx_scene = i
+            if idx < accumulated_tuple:
+                break
+        idx_image = idx - sum(self.tuples_per_scene[:idx_scene])
+        return idx_scene, idx_image
+    def _get_other_random_scene_and_image_id(self, scene_id_to_exclude: int) -> Tuple[int, int]:
+        possible_scene_ids = list(range(len(self.scenes)))
+        possible_scene_ids.remove(scene_id_to_exclude)
+        idx_scene = random.choice(possible_scene_ids)
+        idx_image = random.randint(0, len(self.scenes[idx_scene]) - 1)
+        return idx_scene, idx_image
+class Scene:
+    def __init__(self, root_path, scene_data: Dict[str, Any], max_size_scene) -> None:
+        self.root_path = root_path
+        self.image_path = Path(scene_data["image_path"])
+        self.image_names = scene_data["images"]
+        # randomly sample tuples
+        if max_size_scene > 0:
+            self.tuples = random.sample(scene_data["tuples"], min(max_size_scene, len(scene_data["tuples"])))
+    def __len__(self) -> int:
+        return len(self.tuples)
+    def __getitem__(self, idx: int) -> Tuple[str, str]:
+        idx_1, idx_2 = random.sample([0, 1, 2], 2)
+        idx_1 = self.tuples[idx][idx_1]
+        idx_2 = self.tuples[idx][idx_2]
+        path_image_1 = str(self.root_path / self.image_path / self.image_names[idx_1])
+        path_image_2 = str(self.root_path / self.image_path / self.image_names[idx_2])
+        return path_image_1, path_image_2

imcui/third_party/RIPE/ripe/data/datasets/tokyo247.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import os
+import random
+from glob import glob
+from typing import Any, Callable, Optional
+import torch
+from torch.utils.data import Dataset
+from torchvision.io import read_image
+from ripe import utils
+from ripe.data.data_transforms import Compose
+log = utils.get_pylogger(__name__)
+class Tokyo247(Dataset):
+    def __init__(
+        self,
+        root: str,
+        stage: str = "train",
+        transforms: Optional[Callable] = None,
+        positive_only: bool = False,
+    ):
+        if stage != "train":
+            raise ValueError("Tokyo247Dataset only supports the 'train' stage.")
+        # check if the root directory exists
+        if not os.path.isdir(root):
+            raise FileNotFoundError(f"Directory {root} does not exist.")
+        self.root_dir = root
+        self.transforms = transforms if transforms is not None else Compose([])
+        self.positive_only = positive_only
+        self.image_paths = []
+        self.positive_pairs = []
+        # Collect images grouped by location folder
+        self.locations = {}
+        for location_rough in sorted(os.listdir(self.root_dir)):
+            location_rough_path = os.path.join(self.root_dir, location_rough)
+            # check if the location_rough_path is a directory
+            if not os.path.isdir(location_rough_path):
+                continue
+            for location_fine in sorted(os.listdir(location_rough_path)):
+                location_fine_path = os.path.join(self.root_dir, location_rough, location_fine)
+                if os.path.isdir(location_fine_path):
+                    images = sorted(
+                        glob(os.path.join(location_fine_path, "*.png")),
+                        key=lambda i: int(i[-7:-4]),
+                    )
+                    if len(images) >= 12:
+                        self.locations[location_fine] = images
+                        self.image_paths.extend(images)
+        # Generate positive pairs
+        for _, images in self.locations.items():
+            for i in range(len(images) - 1):
+                self.positive_pairs.append((images[i], images[i + 1]))
+            self.positive_pairs.append((images[-1], images[0]))
+        if positive_only:
+            log.warning("Using only positive pairs!")
+        log.info(f"Found {len(self.positive_pairs)} image pairs.")
+    def __len__(self):
+        if self.positive_only:
+            return len(self.positive_pairs)
+        return 2 * len(self.positive_pairs)
+    def __getitem__(self, idx):
+        sample: Any = {}
+        positive_sample = (idx % 2 == 0) or (self.positive_only)
+        if not self.positive_only:
+            idx = idx // 2
+        sample["label"] = positive_sample
+        if positive_sample:  # Positive pair
+            img1_path, img2_path = self.positive_pairs[idx]
+            assert os.path.dirname(img1_path) == os.path.dirname(img2_path), (
+                f"Source and target image mismatch: {img1_path} vs {img2_path}"
+            )
+            homography = torch.eye(3, dtype=torch.float32)
+        else:  # Negative pair
+            img1_path = random.choice(self.image_paths)
+            img2_path = random.choice(self.image_paths)
+            # Ensure images are from different folders
+            esc = 0
+            while os.path.dirname(img1_path) == os.path.dirname(img2_path):
+                img2_path = random.choice(self.image_paths)
+                esc += 1
+                if esc > 100:
+                    raise RuntimeError("Could not find a negative pair.")
+            assert os.path.dirname(img1_path) != os.path.dirname(img2_path), (
+                f"Source and target image match for negative pair: {img1_path} vs {img2_path}"
+            )
+            homography = torch.zeros((3, 3), dtype=torch.float32)
+        sample["src_path"] = img1_path
+        sample["trg_path"] = img2_path
+        # Load images
+        src_img = read_image(sample["src_path"]) / 255.0
+        trg_img = read_image(sample["trg_path"]) / 255.0
+        _, H_src, W_src = src_img.shape
+        _, H_trg, W_trg = src_img.shape
+        src_mask = torch.ones((1, H_src, W_src), dtype=torch.uint8)
+        trg_mask = torch.ones((1, H_trg, W_trg), dtype=torch.uint8)
+        # Apply transformations
+        if self.transforms:
+            src_img, trg_img, src_mask, trg_mask, _ = self.transforms(src_img, trg_img, src_mask, trg_mask, homography)
+        sample["src_image"] = src_img
+        sample["trg_image"] = trg_img
+        sample["src_mask"] = src_mask.to(torch.bool)
+        sample["trg_mask"] = trg_mask.to(torch.bool)
+        sample["homography"] = homography
+        return sample

imcui/third_party/RIPE/ripe/losses/__init__.py ADDED Viewed

File without changes

imcui/third_party/RIPE/ripe/losses/contrastive_loss.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def second_nearest_neighbor(desc1, desc2):
+    if desc2.shape[0] < 2:  # We cannot perform snn check, so output empty matches
+        raise ValueError("desc2 should have at least 2 descriptors")
+    dist = torch.cdist(desc1, desc2, p=2)
+    vals, idxs = torch.topk(dist, 2, dim=1, largest=False)
+    idxs_in_2 = idxs[:, 1]
+    idxs_in_1 = torch.arange(0, idxs_in_2.size(0), device=dist.device)
+    matches_idxs = torch.cat([idxs_in_1.view(-1, 1), idxs_in_2.view(-1, 1)], 1)
+    return vals[:, 1].view(-1, 1), matches_idxs
+def contrastive_loss(
+    desc1,
+    desc2,
+    matches,
+    inliers,
+    label,
+    logits_1,
+    logits_2,
+    pos_margin=1.0,
+    neg_margin=1.0,
+):
+    if inliers.sum() < 8:  # if there are too few inliers, calculate loss on all matches
+        inliers = torch.ones_like(inliers)
+    matched_inliers_descs1 = desc1[matches[:, 0][inliers]]
+    matched_inliers_descs2 = desc2[matches[:, 1][inliers]]
+    if logits_1 is not None and logits_2 is not None:
+        matched_inliers_logits1 = logits_1[matches[:, 0][inliers]]
+        matched_inliers_logits2 = logits_2[matches[:, 1][inliers]]
+        logits = torch.minimum(matched_inliers_logits1, matched_inliers_logits2)
+    else:
+        logits = torch.ones_like(matches[:, 0][inliers])
+    if label:
+        snn_match_dists_1, idx1 = second_nearest_neighbor(matched_inliers_descs1, desc2)
+        snn_match_dists_2, idx2 = second_nearest_neighbor(matched_inliers_descs2, desc1)
+        dists = torch.hstack((snn_match_dists_1, snn_match_dists_2))
+        min_dists_idx = torch.min(dists, dim=1).indices.unsqueeze(1)
+        dists_hard = torch.gather(dists, 1, min_dists_idx).squeeze(-1)
+        dists_pos = F.pairwise_distance(matched_inliers_descs1, matched_inliers_descs2)
+        contrastive_loss = torch.clamp(pos_margin + dists_pos - dists_hard, min=0.0)
+        contrastive_loss = contrastive_loss * logits
+        contrastive_loss = contrastive_loss.sum() / (logits.sum() + 1e-8)  # small epsilon to avoid division by zero
+    else:
+        dists = F.pairwise_distance(matched_inliers_descs1, matched_inliers_descs2)
+        contrastive_loss = torch.clamp(neg_margin - dists, min=0.0)
+        contrastive_loss = contrastive_loss * logits
+        contrastive_loss = contrastive_loss.sum() / (logits.sum() + 1e-8)  # small epsilon to avoid division by zero
+    return contrastive_loss
+class ContrastiveLoss(nn.Module):
+    def __init__(self, pos_margin=1.0, neg_margin=1.0):
+        super().__init__()
+        self.pos_margin = pos_margin
+        self.neg_margin = neg_margin
+    def forward(self, desc1, desc2, matches, inliers, label, logits_1=None, logits_2=None):
+        return contrastive_loss(
+            desc1,
+            desc2,
+            matches,
+            inliers,
+            label,
+            logits_1,
+            logits_2,
+            self.pos_margin,
+            self.neg_margin,
+        )

imcui/third_party/RIPE/ripe/matcher/__init__.py ADDED Viewed

File without changes

imcui/third_party/RIPE/ripe/matcher/concurrent_matcher.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import concurrent.futures
+import torch
+class ConcurrentMatcher:
+    """A class that performs matching and geometric filtering in parallel using a thread pool executor.
+    It matches keypoints from two sets of descriptors and applies a robust estimator to filter the matches based on geometric constraints.
+    Args:
+        matcher (callable): A callable that takes two sets of descriptors and returns distances and indices of matches.
+        robust_estimator (callable): A callable that estimates a geometric transformation and returns inliers.
+        min_num_matches (int, optional): Minimum number of matches required to perform geometric filtering. Defaults to 8.
+        max_workers (int, optional): Maximum number of threads in the thread pool executor. Defaults to 12.
+    """
+    def __init__(self, matcher, robust_estimator, min_num_matches=8, max_workers=12):
+        self.matcher = matcher
+        self.robust_estimator = robust_estimator
+        self.min_num_matches = min_num_matches
+        self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers)
+    @torch.no_grad()
+    def __call__(
+        self,
+        kpts1,
+        kpts2,
+        pdesc1,
+        pdesc2,
+        selected_mask1,
+        selected_mask2,
+        inl_th,
+        label=None,
+    ):
+        dev = pdesc1.device
+        B = pdesc1.shape[0]
+        batch_rel_idx_matches = [None] * B
+        batch_idx_matches = [None] * B
+        future_results = [None] * B
+        for b in range(B):
+            if selected_mask1[b].sum() < 16 or selected_mask2[b].sum() < 16:
+                continue
+            dists, idx_matches = self.matcher(pdesc1[b][selected_mask1[b]], pdesc2[b][selected_mask2[b]])
+            batch_rel_idx_matches[b] = idx_matches.clone()
+            # calculate ABSOLUTE indexes
+            idx_matches[:, 0] = torch.nonzero(selected_mask1[b], as_tuple=False)[idx_matches[:, 0]].squeeze()
+            idx_matches[:, 1] = torch.nonzero(selected_mask2[b], as_tuple=False)[idx_matches[:, 1]].squeeze()
+            batch_idx_matches[b] = idx_matches
+            # if not enough matches
+            if idx_matches.shape[0] < self.min_num_matches:
+                ransac_inliers = torch.zeros((idx_matches.shape[0]), device=dev).bool()
+                future_results[b] = (None, ransac_inliers)
+                continue
+            # use label information to exclude negative pairs from geometric filtering process -> enforces more descriminative descriptors
+            if label is not None and label[b] == 0:
+                ransac_inliers = torch.ones((idx_matches.shape[0]), device=dev).bool()
+                future_results[b] = (None, ransac_inliers)
+                continue
+            mkpts1 = kpts1[b][idx_matches[:, 0]]
+            mkpts2 = kpts2[b][idx_matches[:, 1]]
+            future_results[b] = self.executor.submit(self.robust_estimator, mkpts1, mkpts2, inl_th)
+        batch_ransac_inliers = [None] * B
+        batch_Fm = [None] * B
+        for b in range(B):
+            future_result = future_results[b]
+            if future_result is None:
+                ransac_inliers = None
+                Fm = None
+            elif isinstance(future_result, tuple):
+                Fm, ransac_inliers = future_result
+            else:
+                Fm, ransac_inliers = future_result.result()
+                # if no inliers
+                if ransac_inliers.sum() == 0:
+                    ransac_inliers = ransac_inliers.squeeze(
+                        -1
+                    )  # kornia.geometry.ransac.RANSAC returns (N, 1) tensor if no inliers and (N,) tensor if inliers
+                    Fm = None
+            batch_ransac_inliers[b] = ransac_inliers
+            batch_Fm[b] = Fm
+        return batch_rel_idx_matches, batch_idx_matches, batch_ransac_inliers, batch_Fm

imcui/third_party/RIPE/ripe/matcher/pose_estimator_poselib.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import poselib
+import torch
+class PoseLibRelativePoseEstimator:
+    """PoseLibRelativePoseEstimator estimates the fundamental matrix using poselib library.
+    It uses the poselib's estimate_fundamental function to compute the fundamental matrix and inliers based on the provided points.
+    Args:
+        None
+    """
+    def __init__(self):
+        pass
+    def __call__(self, pts0, pts1, inl_th):
+        F, info = poselib.estimate_fundamental(
+            pts0.cpu().numpy(),
+            pts1.cpu().numpy(),
+            {
+                "max_epipolar_error": inl_th,
+            },
+        )
+        success = F is not None
+        if success:
+            inliers = info.pop("inliers")
+            inliers = torch.tensor(inliers, dtype=torch.bool, device=pts0.device)
+        else:
+            inliers = torch.zeros(pts0.shape[0], dtype=torch.bool, device=pts0.device)
+        return F, inliers

imcui/third_party/RIPE/ripe/model_zoo/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .vgg_hyper import vgg_hyper # noqa: F401

imcui/third_party/RIPE/ripe/model_zoo/vgg_hyper.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from pathlib import Path
+import torch
+from ripe.models.backbones.vgg import VGG
+from ripe.models.ripe import RIPE
+from ripe.models.upsampler.hypercolumn_features import HyperColumnFeatures
+def vgg_hyper(model_path: Path = None, desc_shares=None):
+    if model_path is None:
+        # check if the weights file exists in the current directory
+        model_path = Path("/tmp/ripe_weights.pth")
+        if model_path.exists():
+            print(f"Using existing weights from {model_path}")
+        else:
+            print("Weights file not found. Downloading ...")
+            torch.hub.download_url_to_file(
+                "https://cvg.hhi.fraunhofer.de/RIPE/ripe_weights.pth",
+                "/tmp/ripe_weights.pth",
+            )
+    else:
+        if not model_path.exists():
+            print(f"Error: {model_path} does not exist.")
+            raise FileNotFoundError(f"Error: {model_path} does not exist.")
+    backbone = VGG(pretrained=False)
+    upsampler = HyperColumnFeatures()
+    extractor = RIPE(
+        net=backbone,
+        upsampler=upsampler,
+        desc_shares=desc_shares,
+    )
+    extractor.load_state_dict(torch.load(model_path, map_location="cpu"))
+    return extractor

imcui/third_party/RIPE/ripe/models/__init__.py ADDED Viewed

File without changes

imcui/third_party/RIPE/ripe/models/backbones/__init__.py ADDED Viewed

File without changes

imcui/third_party/RIPE/ripe/models/backbones/backbone_base.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import torch
+import torch.nn as nn
+class BackboneBase(nn.Module):
+    """Base class for backbone networks. Provides a standard interface for preprocessing inputs and
+    defining encoder dimensions.
+    Args:
+        nchannels (int): Number of input channels.
+        use_instance_norm (bool): Whether to apply instance normalization.
+    """
+    def __init__(self, nchannels=3, use_instance_norm=False):
+        super().__init__()
+        assert nchannels > 0, "Number of channels must be positive."
+        self.nchannels = nchannels
+        self.use_instance_norm = use_instance_norm
+        self.norm = nn.InstanceNorm2d(nchannels) if use_instance_norm else None
+    def get_dim_layers_encoder(self):
+        """Get dimensions of encoder layers."""
+        raise NotImplementedError("Subclasses must implement this method.")
+    def _forward(self, x):
+        """Define the forward pass for the backbone."""
+        raise NotImplementedError("Subclasses must implement this method.")
+    def forward(self, x: torch.Tensor, preprocess=True):
+        """Forward pass with optional preprocessing.
+        Args:
+            x (Tensor): Input tensor.
+            preprocess (bool): Whether to apply channel reduction.
+        """
+        if preprocess:
+            if x.dim() != 4:
+                if x.dim() == 2 and x.shape[0] > 3 and x.shape[1] > 3:
+                    x = x.unsqueeze(0).unsqueeze(0)
+                elif x.dim() == 3:
+                    x = x.unsqueeze(0)
+                else:
+                    raise ValueError(f"Unexpected input shape: {x.shape}")
+            if self.nchannels == 1 and x.shape[1] != 1:
+                if len(x.shape) == 4:  # Assumes (batch, channel, height, width)
+                    x = torch.mean(x, axis=1, keepdim=True)
+                else:
+                    raise ValueError(f"Unexpected input shape: {x.shape}")
+            #
+            if self.nchannels == 3 and x.shape[1] == 1:
+                if len(x.shape) == 4:
+                    x = x.repeat(1, 3, 1, 1)
+                else:
+                    raise ValueError(f"Unexpected input shape: {x.shape}")
+        if self.use_instance_norm:
+            x = self.norm(x)
+        return self._forward(x)

imcui/third_party/RIPE/ripe/models/backbones/vgg.py ADDED Viewed

	@@ -0,0 +1,99 @@

+# adapted from: https://github.com/Parskatt/DeDoDe/blob/main/DeDoDe/encoder.py and https://github.com/Parskatt/DeDoDe/blob/main/DeDoDe/decoder.py
+import torch.nn as nn
+import torch.nn.functional as F
+from .backbone_base import BackboneBase
+from .vgg_utils import VGG19, ConvRefiner, Decoder
+class VGG(BackboneBase):
+    def __init__(self, nchannels=3, pretrained=True, use_instance_norm=True, mode="dect"):
+        super().__init__(nchannels=nchannels, use_instance_norm=use_instance_norm)
+        self.nchannels = nchannels
+        self.mode = mode
+        if self.mode not in ["dect", "desc", "dect+desc"]:
+            raise ValueError("mode should be 'dect', 'desc' or 'dect+desc'")
+        NUM_OUTPUT_CHANNELS, hidden_blocks = self._get_mode_params(mode)
+        conv_refiner = self._create_conv_refiner(NUM_OUTPUT_CHANNELS, hidden_blocks)
+        self.encoder = VGG19(pretrained=pretrained, num_input_channels=nchannels)
+        self.decoder = Decoder(conv_refiner, num_prototypes=NUM_OUTPUT_CHANNELS)
+    def _get_mode_params(self, mode):
+        """Get the number of output channels and the number of hidden blocks for the ConvRefiner.
+        Depending on the mode, the ConvRefiner will have a different number of output channels.
+        """
+        if mode == "dect":
+            return 1, 8
+        elif mode == "desc":
+            return 256, 5
+        elif mode == "dect+desc":
+            return 256 + 1, 8
+    def _create_conv_refiner(self, num_output_channels, hidden_blocks):
+        return nn.ModuleDict(
+            {
+                "8": ConvRefiner(
+                    512,
+                    512,
+                    256 + num_output_channels,
+                    hidden_blocks=hidden_blocks,
+                    residual=True,
+                ),
+                "4": ConvRefiner(
+                    256 + 256,
+                    256,
+                    128 + num_output_channels,
+                    hidden_blocks=hidden_blocks,
+                    residual=True,
+                ),
+                "2": ConvRefiner(
+                    128 + 128,
+                    128,
+                    64 + num_output_channels,
+                    hidden_blocks=hidden_blocks,
+                    residual=True,
+                ),
+                "1": ConvRefiner(
+                    64 + 64,
+                    64,
+                    1 + num_output_channels,
+                    hidden_blocks=hidden_blocks,
+                    residual=True,
+                ),
+            }
+        )
+    def get_dim_layers_encoder(self):
+        return self.encoder.get_dim_layers()
+    def _forward(self, x):
+        features, sizes = self.encoder(x)
+        output = 0
+        context = None
+        scales = self.decoder.scales
+        for idx, (feature_map, scale) in enumerate(zip(reversed(features), scales)):
+            delta_descriptor, context = self.decoder(feature_map, scale=scale, context=context)
+            output = output + delta_descriptor
+            if idx < len(scales) - 1:
+                size = sizes[-(idx + 2)]
+                output = F.interpolate(output, size=size, mode="bilinear", align_corners=False)
+                context = F.interpolate(context, size=size, mode="bilinear", align_corners=False)
+        if self.mode == "dect":
+            return {"heatmap": output, "coarse_descs": features}
+        elif self.mode == "desc":
+            return {"fine_descs": output, "coarse_descs": features}
+        elif self.mode == "dect+desc":
+            logits = output[:, :1].contiguous()
+            descs = output[:, 1:].contiguous()
+            return {"heatmap": logits, "fine_descs": descs, "coarse_descs": features}
+        else:
+            raise ValueError("mode should be 'dect', 'desc' or 'dect+desc'")

imcui/third_party/RIPE/ripe/models/backbones/vgg_utils.py ADDED Viewed

	@@ -0,0 +1,143 @@

+# adapted from: https://github.com/Parskatt/DeDoDe/blob/main/DeDoDe/encoder.py and https://github.com/Parskatt/DeDoDe/blob/main/DeDoDe/decoder.py
+import torch
+import torch.nn as nn
+import torchvision.models as tvm
+from ripe import utils
+log = utils.get_pylogger(__name__)
+class Decoder(nn.Module):
+    def __init__(self, layers, *args, super_resolution=False, num_prototypes=1, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self.layers = layers
+        self.scales = self.layers.keys()
+        self.super_resolution = super_resolution
+        self.num_prototypes = num_prototypes
+    def forward(self, features, context=None, scale=None):
+        if context is not None:
+            features = torch.cat((features, context), dim=1)
+        stuff = self.layers[scale](features)
+        logits, context = (
+            stuff[:, : self.num_prototypes],
+            stuff[:, self.num_prototypes :],
+        )
+        return logits, context
+class ConvRefiner(nn.Module):
+    def __init__(
+        self,
+        in_dim=6,
+        hidden_dim=16,
+        out_dim=2,
+        dw=True,
+        kernel_size=5,
+        hidden_blocks=5,
+        residual=False,
+    ):
+        super().__init__()
+        self.block1 = self.create_block(
+            in_dim,
+            hidden_dim,
+            dw=False,
+            kernel_size=1,
+        )
+        self.hidden_blocks = nn.Sequential(
+            *[
+                self.create_block(
+                    hidden_dim,
+                    hidden_dim,
+                    dw=dw,
+                    kernel_size=kernel_size,
+                )
+                for hb in range(hidden_blocks)
+            ]
+        )
+        self.hidden_blocks = self.hidden_blocks
+        self.out_conv = nn.Conv2d(hidden_dim, out_dim, 1, 1, 0)
+        self.residual = residual
+    def create_block(
+        self,
+        in_dim,
+        out_dim,
+        dw=True,
+        kernel_size=5,
+        bias=True,
+        norm_type=nn.BatchNorm2d,
+    ):
+        num_groups = 1 if not dw else in_dim
+        if dw:
+            assert out_dim % in_dim == 0, "outdim must be divisible by indim for depthwise"
+        conv1 = nn.Conv2d(
+            in_dim,
+            out_dim,
+            kernel_size=kernel_size,
+            stride=1,
+            padding=kernel_size // 2,
+            groups=num_groups,
+            bias=bias,
+        )
+        norm = norm_type(out_dim) if norm_type is nn.BatchNorm2d else norm_type(num_channels=out_dim)
+        relu = nn.ReLU(inplace=True)
+        conv2 = nn.Conv2d(out_dim, out_dim, 1, 1, 0)
+        return nn.Sequential(conv1, norm, relu, conv2)
+    def forward(self, feats):
+        b, c, hs, ws = feats.shape
+        x0 = self.block1(feats)
+        x = self.hidden_blocks(x0)
+        if self.residual:
+            x = (x + x0) / 1.4
+        x = self.out_conv(x)
+        return x
+class VGG19(nn.Module):
+    def __init__(self, pretrained=False, num_input_channels=3) -> None:
+        super().__init__()
+        self.layers = nn.ModuleList(tvm.vgg19_bn(pretrained=pretrained).features[:40])
+        # Maxpool layers: 6, 13, 26, 39
+        if num_input_channels != 3:
+            log.info(f"Changing input channels from 3 to {num_input_channels}")
+            self.layers[0] = nn.Conv2d(num_input_channels, 64, 3, 1, 1)
+    def get_dim_layers(self):
+        return [64, 128, 256, 512]
+    def forward(self, x, **kwargs):
+        feats = []
+        sizes = []
+        for layer in self.layers:
+            if isinstance(layer, nn.MaxPool2d):
+                feats.append(x)
+                sizes.append(x.shape[-2:])
+            x = layer(x)
+        return feats, sizes
+class VGG(nn.Module):
+    def __init__(self, size="19", pretrained=False) -> None:
+        super().__init__()
+        if size == "11":
+            self.layers = nn.ModuleList(tvm.vgg11_bn(pretrained=pretrained).features[:22])
+        elif size == "13":
+            self.layers = nn.ModuleList(tvm.vgg13_bn(pretrained=pretrained).features[:28])
+        elif size == "19":
+            self.layers = nn.ModuleList(tvm.vgg19_bn(pretrained=pretrained).features[:40])
+        # Maxpool layers: 6, 13, 26, 39
+    def forward(self, x, **kwargs):
+        feats = []
+        sizes = []
+        for layer in self.layers:
+            if isinstance(layer, nn.MaxPool2d):
+                feats.append(x)
+                sizes.append(x.shape[-2:])
+            x = layer(x)
+        return feats, sizes

imcui/third_party/RIPE/ripe/models/ripe.py ADDED Viewed

	@@ -0,0 +1,303 @@

+from typing import List, Optional
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from ripe import utils
+from ripe.utils.utils import gridify
+log = utils.get_pylogger(__name__)
+class KeypointSampler(nn.Module):
+    """
+    Sample keypoints according to a Heatmap
+    Adapted from: https://github.com/verlab/DALF_CVPR_2023/blob/main/modules/models/DALF.py
+    """
+    def __init__(self, window_size=8):
+        super().__init__()
+        self.window_size = window_size
+        self.idx_cells = None  # Cache for meshgrid indices
+    def sample(self, grid):
+        """
+        Sample keypoints given a grid where each cell has logits stacked in last dimension
+        Input
+          grid: [B, C, H//w, W//w, w*w]
+        Returns
+          log_probs: [B, C, H//w, W//w ] - logprobs of selected samples
+          choices: [B, C, H//w, W//w] indices of choices
+          accept_mask: [B, C, H//w, W//w] mask of accepted keypoints
+        """
+        chooser = torch.distributions.Categorical(logits=grid)
+        choices = chooser.sample()
+        logits_selected = torch.gather(grid, -1, choices.unsqueeze(-1)).squeeze(-1)
+        flipper = torch.distributions.Bernoulli(logits=logits_selected)
+        accepted_choices = flipper.sample()
+        # Sum log-probabilities is equivalent to multiplying the probabilities
+        log_probs = chooser.log_prob(choices) + flipper.log_prob(accepted_choices)
+        accept_mask = accepted_choices.gt(0)
+        return (
+            log_probs.squeeze(1),
+            choices,
+            accept_mask.squeeze(1),
+            logits_selected.squeeze(1),
+        )
+    def precompute_idx_cells(self, H, W, device):
+        idx_cells = gridify(
+            torch.dstack(
+                torch.meshgrid(
+                    torch.arange(H, dtype=torch.float32, device=device),
+                    torch.arange(W, dtype=torch.float32, device=device),
+                )
+            )
+            .permute(2, 0, 1)
+            .unsqueeze(0)
+            .expand(1, -1, -1, -1),
+            window_size=self.window_size,
+        )
+        return idx_cells
+    def forward(self, x, mask_padding=None):
+        """
+        Sample keypoints from a heatmap
+        Input
+          x: [B, C, H, W] Heatmap
+          mask_padding: [B, 1, H, W] Mask for padding (optional)
+        Returns
+            keypoints: [B, H//w, W//w, 2] Keypoints in (x, y) format
+            log_probs: [B, H//w, W//w] Log probabilities of selected keypoints
+            mask: [B, H//w, W//w] Mask of accepted keypoints
+            mask_padding: [B, 1, H//w, W//w] Mask of padding (optional)
+            logits_selected: [B, H//w, W//w] Logits of selected keypoints
+        """
+        B, C, H, W = x.shape
+        keypoint_cells = gridify(x, self.window_size)
+        mask_padding = (
+            (torch.min(gridify(mask_padding, self.window_size), dim=4).values) if mask_padding is not None else None
+        )
+        if self.idx_cells is None or self.idx_cells.shape[2:4] != (
+            H // self.window_size,
+            W // self.window_size,
+        ):
+            self.idx_cells = self.precompute_idx_cells(H, W, x.device)
+        log_probs, idx, mask, logits_selected = self.sample(keypoint_cells)
+        keypoints = (
+            torch.gather(
+                self.idx_cells.expand(B, -1, -1, -1, -1),
+                -1,
+                idx.repeat(1, 2, 1, 1).unsqueeze(-1),
+            )
+            .squeeze(-1)
+            .permute(0, 2, 3, 1)
+        )
+        # flip keypoints to (x, y) format
+        return keypoints.flip(-1), log_probs, mask, mask_padding, logits_selected
+class RIPE(nn.Module):
+    """
+    Base class for extracting keypoints and descriptors
+    Input
+      x: [B, C, H, W] Images
+    Returns
+      kpts:
+        list of size [B] with detected keypoints
+      descs:
+        list of size [B] with descriptors
+    """
+    def __init__(
+        self,
+        net,
+        upsampler,
+        window_size: int = 8,
+        non_linearity_dect=None,
+        desc_shares: Optional[List[int]] = None,
+        descriptor_dim: int = 256,
+        device=None,
+    ):
+        super().__init__()
+        self.net = net
+        self.detector = KeypointSampler(window_size)
+        self.upsampler = upsampler
+        self.sampler = None
+        self.window_size = window_size
+        self.non_linearity_dect = non_linearity_dect if non_linearity_dect is not None else nn.Identity()
+        log.info(f"Training with window size {window_size}.")
+        log.info(f"Use {non_linearity_dect} as final non-linearity before the detection heatmap.")
+        dim_coarse_desc = self.get_dim_raw_desc()
+        if desc_shares is not None:
+            assert upsampler.name == "HyperColumnFeatures", (
+                "Individual descriptor convolutions are only supported with HyperColumnFeatures"
+            )
+            assert len(desc_shares) == 4, "desc_shares should have 4 elements"
+            assert sum(desc_shares) == descriptor_dim, f"sum of desc_shares should be {descriptor_dim}"
+            self.conv_dim_reduction_coarse_desc = nn.ModuleList()
+            for dim_in, dim_out in zip(dim_coarse_desc, desc_shares):
+                log.info(f"Training dim reduction descriptor with {dim_in} -> {dim_out} 1x1 conv")
+                self.conv_dim_reduction_coarse_desc.append(
+                    nn.Conv1d(dim_in, dim_out, kernel_size=1, stride=1, padding=0)
+                )
+        else:
+            if descriptor_dim is not None:
+                log.info(f"Training dim reduction descriptor with {sum(dim_coarse_desc)} -> {descriptor_dim} 1x1 conv")
+                self.conv_dim_reduction_coarse_desc = nn.Conv1d(
+                    sum(dim_coarse_desc),
+                    descriptor_dim,
+                    kernel_size=1,
+                    stride=1,
+                    padding=0,
+                )
+            else:
+                log.warning(
+                    f"No descriptor dimension specified, no 1x1 conv will be applied! Direct usage of {sum(dim_coarse_desc)}-dimensional raw descriptor"
+                )
+                self.conv_dim_reduction_coarse_desc = nn.Identity()
+    def get_dim_raw_desc(self):
+        layers_dims_encoder = self.net.get_dim_layers_encoder()
+        if self.upsampler.name == "InterpolateSparse2d":
+            return [layers_dims_encoder[-1]]
+        elif self.upsampler.name == "HyperColumnFeatures":
+            return layers_dims_encoder
+        else:
+            raise ValueError(f"Unknown interpolator {self.upsampler.name}")
+    @torch.inference_mode()
+    def detectAndCompute(self, img, threshold=0.5, top_k=2048, output_aux=False):
+        self.train(False)
+        if img.dim() == 3:
+            img = img.unsqueeze(0)
+        out = self(img, training=False)
+        B, K, H, W = out["heatmap"].shape
+        assert B == 1, "Batch size should be 1"
+        kpts = [{"xy": self.NMS(out["heatmap"][b], threshold)} for b in range(B)]
+        if top_k is not None:
+            for b in range(B):
+                scores = out["heatmap"][b].squeeze(0)[kpts[b]["xy"][:, 1].long(), kpts[b]["xy"][:, 0].long()]
+                sorted_idx = torch.argsort(-scores)
+                kpts[b]["xy"] = kpts[b]["xy"][sorted_idx[:top_k]]
+                if "logprobs" in kpts[b]:
+                    kpts[b]["logprobs"] = kpts[b]["xy"][sorted_idx[:top_k]]
+        if kpts[0]["xy"].shape[0] == 0:
+            raise RuntimeError("No keypoints detected")
+        # the following works for batch size 1 only
+        descs = self.get_descs(out["coarse_descs"], img, kpts[0]["xy"].unsqueeze(0), H, W)
+        descs = descs.squeeze(0)
+        score_map = out["heatmap"][0].squeeze(0)
+        kpts = kpts[0]["xy"]
+        scores = score_map[kpts[:, 1], kpts[:, 0]]
+        scores /= score_map.max()
+        sort_idx = torch.argsort(-scores)
+        kpts, descs, scores = kpts[sort_idx], descs[sort_idx], scores[sort_idx]
+        if output_aux:
+            return (
+                kpts.float(),
+                descs,
+                scores,
+                {
+                    "heatmap": out["heatmap"],
+                    "descs": out["coarse_descs"],
+                    "conv": self.conv_dim_reduction_coarse_desc,
+                },
+            )
+        return kpts.float(), descs, scores
+    def NMS(self, x, threshold=3.0, kernel_size=3):
+        pad = kernel_size // 2
+        local_max = nn.MaxPool2d(kernel_size=kernel_size, stride=1, padding=pad)(x)
+        pos = (x == local_max) & (x > threshold)
+        return pos.nonzero()[..., 1:].flip(-1)
+    def get_descs(self, feature_map, guidance, kpts, H, W):
+        descs = self.upsampler(feature_map, kpts, H, W)
+        if isinstance(self.conv_dim_reduction_coarse_desc, nn.ModuleList):
+            # individual descriptor convolutions for each layer
+            desc_conv = []
+            for desc, conv in zip(descs, self.conv_dim_reduction_coarse_desc):
+                desc_conv.append(conv(desc.permute(0, 2, 1)).permute(0, 2, 1))
+            desc = torch.cat(desc_conv, dim=-1)
+        else:
+            desc = torch.cat(descs, dim=-1)
+            desc = self.conv_dim_reduction_coarse_desc(desc.permute(0, 2, 1)).permute(0, 2, 1)
+        desc = F.normalize(desc, dim=2)
+        return desc
+    def forward(self, x, mask_padding=None, training=False):
+        B, C, H, W = x.shape
+        out = self.net(x)
+        out["heatmap"] = self.non_linearity_dect(out["heatmap"])
+        # print(out['map'].shape, out['descr'].shape)
+        if training:
+            kpts, log_probs, mask, mask_padding, logits_selected = self.detector(out["heatmap"], mask_padding)
+            filter_A = kpts[:, :, :, 0] >= 16
+            filter_B = kpts[:, :, :, 1] >= 16
+            filter_C = kpts[:, :, :, 0] < W - 16
+            filter_D = kpts[:, :, :, 1] < H - 16
+            filter_all = filter_A * filter_B * filter_C * filter_D
+            mask = mask * filter_all
+            return (
+                kpts.view(B, -1, 2),
+                log_probs.view(B, -1),
+                mask.view(B, -1),
+                mask_padding.view(B, -1),
+                logits_selected.view(B, -1),
+                out,
+            )
+        else:
+            return out
+def output_number_trainable_params(model):
+    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
+    nb_params = sum([np.prod(p.size()) for p in model_parameters])
+    print(f"Number of trainable parameters: {nb_params:d}")