yushihu commited on
Commit
6bb1ad5
·
verified ·
1 Parent(s): c953528

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. LICENSE +201 -0
  2. README.md +278 -8
  3. app.py +95 -0
  4. controlnet/README.md +15 -0
  5. controlnet/config.json +51 -0
  6. depth_anything/__pycache__/blocks.cpython-39.pyc +0 -0
  7. depth_anything/__pycache__/dpt.cpython-39.pyc +0 -0
  8. depth_anything/blocks.py +153 -0
  9. depth_anything/dpt.py +187 -0
  10. depth_anything/util/__pycache__/transform.cpython-39.pyc +0 -0
  11. depth_anything/util/transform.py +248 -0
  12. depthanything_server.py +61 -0
  13. gallery.md +160 -0
  14. metric_depth/README.md +89 -0
  15. metric_depth/depth_to_pointcloud.py +79 -0
  16. metric_depth/environment.yml +26 -0
  17. metric_depth/evaluate.py +160 -0
  18. metric_depth/point_cloud_on_trackbar.py +168 -0
  19. metric_depth/train_mix.py +182 -0
  20. metric_depth/train_mono.py +176 -0
  21. metric_depth/train_test_inputs/kitti_eigen_test_files_with_gt.txt +0 -0
  22. metric_depth/train_test_inputs/kitti_eigen_train_files_with_gt.txt +0 -0
  23. metric_depth/train_test_inputs/nyudepthv2_test_files_with_gt.txt +654 -0
  24. metric_depth/train_test_inputs/nyudepthv2_train_files_with_gt.txt +0 -0
  25. metric_depth/zoedepth/data/__init__.py +24 -0
  26. metric_depth/zoedepth/data/data_mono.py +573 -0
  27. metric_depth/zoedepth/data/ddad.py +125 -0
  28. metric_depth/zoedepth/data/diml_indoor_test.py +125 -0
  29. metric_depth/zoedepth/data/diml_outdoor_test.py +114 -0
  30. metric_depth/zoedepth/data/diode.py +125 -0
  31. metric_depth/zoedepth/data/hypersim.py +138 -0
  32. metric_depth/zoedepth/data/ibims.py +81 -0
  33. metric_depth/zoedepth/data/preprocess.py +154 -0
  34. metric_depth/zoedepth/data/sun_rgbd_loader.py +115 -0
  35. metric_depth/zoedepth/data/transforms.py +481 -0
  36. metric_depth/zoedepth/data/vkitti.py +151 -0
  37. metric_depth/zoedepth/data/vkitti2.py +187 -0
  38. metric_depth/zoedepth/models/__init__.py +24 -0
  39. metric_depth/zoedepth/models/base_models/__init__.py +24 -0
  40. metric_depth/zoedepth/models/base_models/depth_anything.py +376 -0
  41. metric_depth/zoedepth/models/base_models/dpt_dinov2/blocks.py +153 -0
  42. metric_depth/zoedepth/models/base_models/dpt_dinov2/dpt.py +157 -0
  43. metric_depth/zoedepth/models/base_models/midas.py +380 -0
  44. metric_depth/zoedepth/models/builder.py +51 -0
  45. metric_depth/zoedepth/models/depth_model.py +152 -0
  46. metric_depth/zoedepth/models/layers/attractor.py +208 -0
  47. metric_depth/zoedepth/models/layers/dist_layers.py +121 -0
  48. metric_depth/zoedepth/models/layers/localbins_layers.py +169 -0
  49. metric_depth/zoedepth/models/layers/patch_transformer.py +91 -0
  50. metric_depth/zoedepth/models/model_io.py +92 -0
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md CHANGED
@@ -1,12 +1,282 @@
1
  ---
2
- title: Sketchpad DepthAnything
3
- emoji: 📉
4
- colorFrom: purple
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 4.40.0
8
- app_file: app.py
9
- pinned: false
10
  ---
 
 
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Sketchpad-DepthAnything
3
+ app_file: depthanything_server.py
 
 
4
  sdk: gradio
5
+ sdk_version: 4.39.0
 
 
6
  ---
7
+ <div align="center">
8
+ <h2>Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data</h2>
9
 
10
+ [**Lihe Yang**](https://liheyoung.github.io/)<sup>1</sup> · [**Bingyi Kang**](https://scholar.google.com/citations?user=NmHgX-wAAAAJ)<sup>2&dagger;</sup> · [**Zilong Huang**](http://speedinghzl.github.io/)<sup>2</sup> · [**Xiaogang Xu**](https://xiaogang00.github.io/)<sup>3,4</sup> · [**Jiashi Feng**](https://sites.google.com/site/jshfeng/)<sup>2</sup> · [**Hengshuang Zhao**](https://hszhao.github.io/)<sup>1*</sup>
11
+
12
+ <sup>1</sup>HKU&emsp;&emsp;&emsp;&emsp;<sup>2</sup>TikTok&emsp;&emsp;&emsp;&emsp;<sup>3</sup>CUHK&emsp;&emsp;&emsp;&emsp;<sup>4</sup>ZJU
13
+
14
+ &dagger;project lead&emsp;*corresponding author
15
+
16
+ **CVPR 2024**
17
+
18
+ <a href="https://arxiv.org/abs/2401.10891"><img src='https://img.shields.io/badge/arXiv-Depth Anything-red' alt='Paper PDF'></a>
19
+ <a href='https://depth-anything.github.io'><img src='https://img.shields.io/badge/Project_Page-Depth Anything-green' alt='Project Page'></a>
20
+ <a href='https://huggingface.co/spaces/LiheYoung/Depth-Anything'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
21
+ <a href='https://huggingface.co/papers/2401.10891'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Paper-yellow'></a>
22
+ </div>
23
+
24
+ This work presents Depth Anything, a highly practical solution for robust monocular depth estimation by training on a combination of 1.5M labeled images and **62M+ unlabeled images**.
25
+
26
+ ![teaser](assets/teaser.png)
27
+
28
+ <div align="center">
29
+ <a href="https://github.com/DepthAnything/Depth-Anything-V2"><b>Try our latest Depth Anything V2 models!</b></a><br>
30
+ </div>
31
+
32
+ ## News
33
+
34
+ * **2024-06-14:** [Depth Anything V2](https://github.com/DepthAnything/Depth-Anything-V2) is released.
35
+ * **2024-02-27:** Depth Anything is accepted by CVPR 2024.
36
+ * **2024-02-05:** [Depth Anything Gallery](./gallery.md) is released. Thank all the users!
37
+ * **2024-02-02:** Depth Anything serves as the default depth processor for [InstantID](https://github.com/InstantID/InstantID) and [InvokeAI](https://github.com/invoke-ai/InvokeAI/releases/tag/v3.6.1).
38
+ * **2024-01-25:** Support [video depth visualization](./run_video.py). An [online demo for video](https://huggingface.co/spaces/JohanDL/Depth-Anything-Video) is also available.
39
+ * **2024-01-23:** The new ControlNet based on Depth Anything is integrated into [ControlNet WebUI](https://github.com/Mikubill/sd-webui-controlnet) and [ComfyUI's ControlNet](https://github.com/Fannovel16/comfyui_controlnet_aux).
40
+ * **2024-01-23:** Depth Anything [ONNX](https://github.com/fabio-sim/Depth-Anything-ONNX) and [TensorRT](https://github.com/spacewalk01/depth-anything-tensorrt) versions are supported.
41
+ * **2024-01-22:** Paper, project page, code, models, and demo ([HuggingFace](https://huggingface.co/spaces/LiheYoung/Depth-Anything), [OpenXLab](https://openxlab.org.cn/apps/detail/yyfan/depth_anything)) are released.
42
+
43
+
44
+ ## Features of Depth Anything
45
+
46
+ ***If you need other features, please first check [existing community supports](#community-support).***
47
+
48
+ - **Relative depth estimation**:
49
+
50
+ Our foundation models listed [here](https://huggingface.co/spaces/LiheYoung/Depth-Anything/tree/main/checkpoints) can provide relative depth estimation for any given image robustly. Please refer [here](#running) for details.
51
+
52
+ - **Metric depth estimation**
53
+
54
+ We fine-tune our Depth Anything model with metric depth information from NYUv2 or KITTI. It offers strong capabilities of both in-domain and zero-shot metric depth estimation. Please refer [here](./metric_depth) for details.
55
+
56
+
57
+ - **Better depth-conditioned ControlNet**
58
+
59
+ We re-train **a better depth-conditioned ControlNet** based on Depth Anything. It offers more precise synthesis than the previous MiDaS-based ControlNet. Please refer [here](./controlnet/) for details. You can also use our new ControlNet based on Depth Anything in [ControlNet WebUI](https://github.com/Mikubill/sd-webui-controlnet) or [ComfyUI's ControlNet](https://github.com/Fannovel16/comfyui_controlnet_aux).
60
+
61
+ - **Downstream high-level scene understanding**
62
+
63
+ The Depth Anything encoder can be fine-tuned to downstream high-level perception tasks, *e.g.*, semantic segmentation, 86.2 mIoU on Cityscapes and 59.4 mIoU on ADE20K. Please refer [here](./semseg/) for details.
64
+
65
+
66
+ ## Performance
67
+
68
+ Here we compare our Depth Anything with the previously best MiDaS v3.1 BEiT<sub>L-512</sub> model.
69
+
70
+ Please note that the latest MiDaS is also trained on KITTI and NYUv2, while we do not.
71
+
72
+ | Method | Params | KITTI || NYUv2 || Sintel || DDAD || ETH3D || DIODE ||
73
+ |-|-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
74
+ | | | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ |
75
+ | MiDaS | 345.0M | 0.127 | 0.850 | 0.048 | *0.980* | 0.587 | 0.699 | 0.251 | 0.766 | 0.139 | 0.867 | 0.075 | 0.942 |
76
+ | **Ours-S** | 24.8M | 0.080 | 0.936 | 0.053 | 0.972 | 0.464 | 0.739 | 0.247 | 0.768 | 0.127 | **0.885** | 0.076 | 0.939 |
77
+ | **Ours-B** | 97.5M | *0.080* | *0.939* | *0.046* | 0.979 | **0.432** | *0.756* | *0.232* | *0.786* | **0.126** | *0.884* | *0.069* | *0.946* |
78
+ | **Ours-L** | 335.3M | **0.076** | **0.947** | **0.043** | **0.981** | *0.458* | **0.760** | **0.230** | **0.789** | *0.127* | 0.882 | **0.066** | **0.952** |
79
+
80
+ We highlight the **best** and *second best* results in **bold** and *italic* respectively (**better results**: AbsRel $\downarrow$ , $\delta_1 \uparrow$).
81
+
82
+ ## Pre-trained models
83
+
84
+ We provide three models of varying scales for robust relative depth estimation:
85
+
86
+ | Model | Params | Inference Time on V100 (ms) | A100 | RTX4090 ([TensorRT](https://github.com/spacewalk01/depth-anything-tensorrt)) |
87
+ |:-|-:|:-:|:-:|:-:|
88
+ | Depth-Anything-Small | 24.8M | 12 | 8 | 3 |
89
+ | Depth-Anything-Base | 97.5M | 13 | 9 | 6 |
90
+ | Depth-Anything-Large | 335.3M | 20 | 13 | 12 |
91
+
92
+ Note that the V100 and A100 inference time (*without TensorRT*) is computed by excluding the pre-processing and post-processing stages, whereas the last column RTX4090 (*with TensorRT*) is computed by including these two stages (please refer to [Depth-Anything-TensorRT](https://github.com/spacewalk01/depth-anything-tensorrt)).
93
+
94
+ You can easily load our pre-trained models by:
95
+ ```python
96
+ from depth_anything.dpt import DepthAnything
97
+
98
+ encoder = 'vits' # can also be 'vitb' or 'vitl'
99
+ depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_{:}14'.format(encoder))
100
+ ```
101
+
102
+ Depth Anything is also supported in [``transformers``](https://github.com/huggingface/transformers). You can use it for depth prediction within [3 lines of code](https://huggingface.co/docs/transformers/main/model_doc/depth_anything) (credit to [@niels](https://huggingface.co/nielsr)).
103
+
104
+ ### *No network connection, cannot load these models?*
105
+
106
+ <details>
107
+ <summary>Click here for solutions</summary>
108
+
109
+ - First, manually download the three checkpoints: [depth-anything-large](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints/depth_anything_vitl14.pth), [depth-anything-base](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints/depth_anything_vitb14.pth), and [depth-anything-small](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints/depth_anything_vits14.pth).
110
+
111
+ - Second, upload the folder containing the checkpoints to your remote server.
112
+
113
+ - Lastly, load the model locally:
114
+ ```python
115
+ from depth_anything.dpt import DepthAnything
116
+
117
+ model_configs = {
118
+ 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
119
+ 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
120
+ 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}
121
+ }
122
+
123
+ encoder = 'vitl' # or 'vitb', 'vits'
124
+ depth_anything = DepthAnything(model_configs[encoder])
125
+ depth_anything.load_state_dict(torch.load(f'./checkpoints/depth_anything_{encoder}14.pth'))
126
+ ```
127
+ Note that in this locally loading manner, you also do not have to install the ``huggingface_hub`` package. In this way, please feel free to delete this [line](https://github.com/LiheYoung/Depth-Anything/blob/e7ef4b4b7a0afd8a05ce9564f04c1e5b68268516/depth_anything/dpt.py#L5) and the ``PyTorchModelHubMixin`` in this [line](https://github.com/LiheYoung/Depth-Anything/blob/e7ef4b4b7a0afd8a05ce9564f04c1e5b68268516/depth_anything/dpt.py#L169).
128
+ </details>
129
+
130
+
131
+ ## Usage
132
+
133
+ ### Installation
134
+
135
+ ```bash
136
+ git clone https://github.com/LiheYoung/Depth-Anything
137
+ cd Depth-Anything
138
+ pip install -r requirements.txt
139
+ ```
140
+
141
+ ### Running
142
+
143
+ ```bash
144
+ python run.py --encoder <vits | vitb | vitl> --img-path <img-directory | single-img | txt-file> --outdir <outdir> [--pred-only] [--grayscale]
145
+ ```
146
+ Arguments:
147
+ - ``--img-path``: you can either 1) point it to an image directory storing all interested images, 2) point it to a single image, or 3) point it to a text file storing all image paths.
148
+ - ``--pred-only`` is set to save the predicted depth map only. Without it, by default, we visualize both image and its depth map side by side.
149
+ - ``--grayscale`` is set to save the grayscale depth map. Without it, by default, we apply a color palette to the depth map.
150
+
151
+ For example:
152
+ ```bash
153
+ python run.py --encoder vitl --img-path assets/examples --outdir depth_vis
154
+ ```
155
+
156
+ **If you want to use Depth Anything on videos:**
157
+ ```bash
158
+ python run_video.py --encoder vitl --video-path assets/examples_video --outdir video_depth_vis
159
+ ```
160
+
161
+ ### Gradio demo <a href='https://github.com/gradio-app/gradio'><img src='https://img.shields.io/github/stars/gradio-app/gradio'></a>
162
+
163
+ To use our gradio demo locally:
164
+
165
+ ```bash
166
+ python app.py
167
+ ```
168
+
169
+ You can also try our [online demo](https://huggingface.co/spaces/LiheYoung/Depth-Anything).
170
+
171
+ ### Import Depth Anything to your project
172
+
173
+ If you want to use Depth Anything in your own project, you can simply follow [``run.py``](run.py) to load our models and define data pre-processing.
174
+
175
+ <details>
176
+ <summary>Code snippet (note the difference between our data pre-processing and that of MiDaS)</summary>
177
+
178
+ ```python
179
+ from depth_anything.dpt import DepthAnything
180
+ from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
181
+
182
+ import cv2
183
+ import torch
184
+ from torchvision.transforms import Compose
185
+
186
+ encoder = 'vits' # can also be 'vitb' or 'vitl'
187
+ depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_{:}14'.format(encoder)).eval()
188
+
189
+ transform = Compose([
190
+ Resize(
191
+ width=518,
192
+ height=518,
193
+ resize_target=False,
194
+ keep_aspect_ratio=True,
195
+ ensure_multiple_of=14,
196
+ resize_method='lower_bound',
197
+ image_interpolation_method=cv2.INTER_CUBIC,
198
+ ),
199
+ NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
200
+ PrepareForNet(),
201
+ ])
202
+
203
+ image = cv2.cvtColor(cv2.imread('your image path'), cv2.COLOR_BGR2RGB) / 255.0
204
+ image = transform({'image': image})['image']
205
+ image = torch.from_numpy(image).unsqueeze(0)
206
+
207
+ # depth shape: 1xHxW
208
+ depth = depth_anything(image)
209
+ ```
210
+ </details>
211
+
212
+ ### Do not want to define image pre-processing or download model definition files?
213
+
214
+ Easily use Depth Anything through [``transformers``](https://github.com/huggingface/transformers) within 3 lines of code! Please refer to [these instructions](https://huggingface.co/docs/transformers/main/model_doc/depth_anything) (credit to [@niels](https://huggingface.co/nielsr)).
215
+
216
+ **Note:** If you encounter ``KeyError: 'depth_anything'``, please install the latest [``transformers``](https://github.com/huggingface/transformers) from source:
217
+ ```bash
218
+ pip install git+https://github.com/huggingface/transformers.git
219
+ ```
220
+ <details>
221
+ <summary>Click here for a brief demo:</summary>
222
+
223
+ ```python
224
+ from transformers import pipeline
225
+ from PIL import Image
226
+
227
+ image = Image.open('Your-image-path')
228
+ pipe = pipeline(task="depth-estimation", model="LiheYoung/depth-anything-small-hf")
229
+ depth = pipe(image)["depth"]
230
+ ```
231
+ </details>
232
+
233
+ ## Community Support
234
+
235
+ **We sincerely appreciate all the extensions built on our Depth Anything from the community. Thank you a lot!**
236
+
237
+ Here we list the extensions we have found:
238
+ - Depth Anything TensorRT:
239
+ - https://github.com/spacewalk01/depth-anything-tensorrt
240
+ - https://github.com/thinvy/DepthAnythingTensorrtDeploy
241
+ - https://github.com/daniel89710/trt-depth-anything
242
+ - Depth Anything ONNX: https://github.com/fabio-sim/Depth-Anything-ONNX
243
+ - Depth Anything in Transformers.js (3D visualization): https://huggingface.co/spaces/Xenova/depth-anything-web
244
+ - Depth Anything for video (online demo): https://huggingface.co/spaces/JohanDL/Depth-Anything-Video
245
+ - Depth Anything in ControlNet WebUI: https://github.com/Mikubill/sd-webui-controlnet
246
+ - Depth Anything in ComfyUI's ControlNet: https://github.com/Fannovel16/comfyui_controlnet_aux
247
+ - Depth Anything in X-AnyLabeling: https://github.com/CVHub520/X-AnyLabeling
248
+ - Depth Anything in OpenXLab: https://openxlab.org.cn/apps/detail/yyfan/depth_anything
249
+ - Depth Anything in OpenVINO: https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/280-depth-anything
250
+ - Depth Anything ROS:
251
+ - https://github.com/scepter914/DepthAnything-ROS
252
+ - https://github.com/polatztrk/depth_anything_ros
253
+ - Depth Anything Android:
254
+ - https://github.com/FeiGeChuanShu/ncnn-android-depth_anything
255
+ - https://github.com/shubham0204/Depth-Anything-Android
256
+ - Depth Anything in TouchDesigner: https://github.com/olegchomp/TDDepthAnything
257
+ - LearnOpenCV research article on Depth Anything: https://learnopencv.com/depth-anything
258
+ - Learn more about the DPT architecture we used: https://github.com/heyoeyo/muggled_dpt
259
+ - Depth Anything in NVIDIA Jetson Orin: https://github.com/ZhuYaoHui1998/jetson-examples/blob/main/reComputer/scripts/depth-anything
260
+
261
+
262
+ If you have your amazing projects supporting or improving (*e.g.*, speed) Depth Anything, please feel free to drop an issue. We will add them here.
263
+
264
+
265
+ ## Acknowledgement
266
+
267
+ We would like to express our deepest gratitude to [AK(@_akhaliq)](https://twitter.com/_akhaliq) and the awesome HuggingFace team ([@niels](https://huggingface.co/nielsr), [@hysts](https://huggingface.co/hysts), and [@yuvraj](https://huggingface.co/ysharma)) for helping improve the online demo and build the HF models.
268
+
269
+ Besides, we thank the [MagicEdit](https://magic-edit.github.io/) team for providing some video examples for video depth estimation, and [Tiancheng Shen](https://scholar.google.com/citations?user=iRY1YVoAAAAJ) for evaluating the depth maps with MagicEdit.
270
+
271
+ ## Citation
272
+
273
+ If you find this project useful, please consider citing:
274
+
275
+ ```bibtex
276
+ @inproceedings{depthanything,
277
+ title={Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data},
278
+ author={Yang, Lihe and Kang, Bingyi and Huang, Zilong and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang},
279
+ booktitle={CVPR},
280
+ year={2024}
281
+ }
282
+ ```
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ import os
5
+ from PIL import Image
6
+ import torch
7
+ import torch.nn.functional as F
8
+ from torchvision.transforms import Compose
9
+ import tempfile
10
+ from gradio_imageslider import ImageSlider
11
+
12
+ from depth_anything.dpt import DepthAnything
13
+ from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
14
+
15
+ css = """
16
+ #img-display-container {
17
+ max-height: 100vh;
18
+ }
19
+ #img-display-input {
20
+ max-height: 80vh;
21
+ }
22
+ #img-display-output {
23
+ max-height: 80vh;
24
+ }
25
+ """
26
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
27
+ model = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(DEVICE).eval()
28
+
29
+ title = "# Depth Anything"
30
+ description = """Official demo for **Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data**.
31
+
32
+ Please refer to our [paper](https://arxiv.org/abs/2401.10891), [project page](https://depth-anything.github.io), or [github](https://github.com/LiheYoung/Depth-Anything) for more details."""
33
+
34
+ transform = Compose([
35
+ Resize(
36
+ width=518,
37
+ height=518,
38
+ resize_target=False,
39
+ keep_aspect_ratio=True,
40
+ ensure_multiple_of=14,
41
+ resize_method='lower_bound',
42
+ image_interpolation_method=cv2.INTER_CUBIC,
43
+ ),
44
+ NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
45
+ PrepareForNet(),
46
+ ])
47
+
48
+ @torch.no_grad()
49
+ def predict_depth(model, image):
50
+ return model(image)
51
+
52
+ with gr.Blocks(css=css) as demo:
53
+ gr.Markdown(title)
54
+ gr.Markdown(description)
55
+ gr.Markdown("### Depth Prediction demo")
56
+ gr.Markdown("You can slide the output to compare the depth prediction with input image")
57
+
58
+ with gr.Row():
59
+ input_image = gr.Image(label="Input Image", type='numpy', elem_id='img-display-input')
60
+ depth_image_slider = ImageSlider(label="Depth Map with Slider View", elem_id='img-display-output', position=0.5)
61
+ raw_file = gr.File(label="16-bit raw depth (can be considered as disparity)")
62
+ submit = gr.Button("Submit")
63
+
64
+ def on_submit(image):
65
+ original_image = image.copy()
66
+
67
+ h, w = image.shape[:2]
68
+
69
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
70
+ image = transform({'image': image})['image']
71
+ image = torch.from_numpy(image).unsqueeze(0).to(DEVICE)
72
+
73
+ depth = predict_depth(model, image)
74
+ depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
75
+
76
+ raw_depth = Image.fromarray(depth.cpu().numpy().astype('uint16'))
77
+ tmp = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
78
+ raw_depth.save(tmp.name)
79
+
80
+ depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
81
+ depth = depth.cpu().numpy().astype(np.uint8)
82
+ colored_depth = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)[:, :, ::-1]
83
+
84
+ return [(original_image, colored_depth), tmp.name]
85
+
86
+ submit.click(on_submit, inputs=[input_image], outputs=[depth_image_slider, raw_file])
87
+
88
+ example_files = os.listdir('assets/examples')
89
+ example_files.sort()
90
+ example_files = [os.path.join('assets/examples', filename) for filename in example_files]
91
+ examples = gr.Examples(examples=example_files, inputs=[input_image], outputs=[depth_image_slider, raw_file], fn=on_submit, cache_examples=False)
92
+
93
+
94
+ if __name__ == '__main__':
95
+ demo.queue().launch()
controlnet/README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Depth-Conditioned ControlNet based on Depth Anything
2
+
3
+ We use [Diffusers](https://github.com/huggingface/diffusers/tree/main) to re-train a better depth-conditioned ControlNet based on our Depth Anything.
4
+
5
+ Please download our [config file](./config.json) and [pre-trained weights](https://huggingface.co/spaces/LiheYoung/Depth-Anything/tree/main/checkpoints_controlnet), then follow the [instructions](https://github.com/huggingface/diffusers/tree/main/examples/controlnet) in Diffusers for inference.
6
+
7
+ ## Depth-to-Image Synthesis
8
+
9
+ ![demo2](../assets/controlnet_demo1.png)
10
+ ![demo1](../assets/controlnet_demo2.png)
11
+
12
+
13
+ ## Video Editing
14
+
15
+ Please refer to our [project page](https://depth-anything.github.io/). We use [MagicEdit](https://github.com/magic-research/magic-edit) to show demos of video editing based on depth information.
controlnet/config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "ControlNetModel",
3
+ "_diffusers_version": "0.26.0.dev0",
4
+ "act_fn": "silu",
5
+ "addition_embed_type": null,
6
+ "addition_embed_type_num_heads": 64,
7
+ "addition_time_embed_dim": null,
8
+ "attention_head_dim": 8,
9
+ "block_out_channels": [
10
+ 320,
11
+ 640,
12
+ 1280,
13
+ 1280
14
+ ],
15
+ "class_embed_type": null,
16
+ "conditioning_channels": 3,
17
+ "conditioning_embedding_out_channels": [
18
+ 16,
19
+ 32,
20
+ 96,
21
+ 256
22
+ ],
23
+ "controlnet_conditioning_channel_order": "rgb",
24
+ "cross_attention_dim": 768,
25
+ "down_block_types": [
26
+ "CrossAttnDownBlock2D",
27
+ "CrossAttnDownBlock2D",
28
+ "CrossAttnDownBlock2D",
29
+ "DownBlock2D"
30
+ ],
31
+ "downsample_padding": 1,
32
+ "encoder_hid_dim": null,
33
+ "encoder_hid_dim_type": null,
34
+ "flip_sin_to_cos": true,
35
+ "freq_shift": 0,
36
+ "global_pool_conditions": false,
37
+ "in_channels": 4,
38
+ "layers_per_block": 2,
39
+ "mid_block_scale_factor": 1,
40
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
41
+ "norm_eps": 1e-05,
42
+ "norm_num_groups": 32,
43
+ "num_attention_heads": null,
44
+ "num_class_embeds": null,
45
+ "only_cross_attention": false,
46
+ "projection_class_embeddings_input_dim": null,
47
+ "resnet_time_scale_shift": "default",
48
+ "transformer_layers_per_block": 1,
49
+ "upcast_attention": false,
50
+ "use_linear_projection": false
51
+ }
depth_anything/__pycache__/blocks.cpython-39.pyc ADDED
Binary file (3.22 kB). View file
 
depth_anything/__pycache__/dpt.cpython-39.pyc ADDED
Binary file (5.03 kB). View file
 
depth_anything/blocks.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+
3
+
4
+ def _make_scratch(in_shape, out_shape, groups=1, expand=False):
5
+ scratch = nn.Module()
6
+
7
+ out_shape1 = out_shape
8
+ out_shape2 = out_shape
9
+ out_shape3 = out_shape
10
+ if len(in_shape) >= 4:
11
+ out_shape4 = out_shape
12
+
13
+ if expand:
14
+ out_shape1 = out_shape
15
+ out_shape2 = out_shape*2
16
+ out_shape3 = out_shape*4
17
+ if len(in_shape) >= 4:
18
+ out_shape4 = out_shape*8
19
+
20
+ scratch.layer1_rn = nn.Conv2d(
21
+ in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
22
+ )
23
+ scratch.layer2_rn = nn.Conv2d(
24
+ in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
25
+ )
26
+ scratch.layer3_rn = nn.Conv2d(
27
+ in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
28
+ )
29
+ if len(in_shape) >= 4:
30
+ scratch.layer4_rn = nn.Conv2d(
31
+ in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
32
+ )
33
+
34
+ return scratch
35
+
36
+
37
+ class ResidualConvUnit(nn.Module):
38
+ """Residual convolution module.
39
+ """
40
+
41
+ def __init__(self, features, activation, bn):
42
+ """Init.
43
+
44
+ Args:
45
+ features (int): number of features
46
+ """
47
+ super().__init__()
48
+
49
+ self.bn = bn
50
+
51
+ self.groups=1
52
+
53
+ self.conv1 = nn.Conv2d(
54
+ features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups
55
+ )
56
+
57
+ self.conv2 = nn.Conv2d(
58
+ features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups
59
+ )
60
+
61
+ if self.bn==True:
62
+ self.bn1 = nn.BatchNorm2d(features)
63
+ self.bn2 = nn.BatchNorm2d(features)
64
+
65
+ self.activation = activation
66
+
67
+ self.skip_add = nn.quantized.FloatFunctional()
68
+
69
+ def forward(self, x):
70
+ """Forward pass.
71
+
72
+ Args:
73
+ x (tensor): input
74
+
75
+ Returns:
76
+ tensor: output
77
+ """
78
+
79
+ out = self.activation(x)
80
+ out = self.conv1(out)
81
+ if self.bn==True:
82
+ out = self.bn1(out)
83
+
84
+ out = self.activation(out)
85
+ out = self.conv2(out)
86
+ if self.bn==True:
87
+ out = self.bn2(out)
88
+
89
+ if self.groups > 1:
90
+ out = self.conv_merge(out)
91
+
92
+ return self.skip_add.add(out, x)
93
+
94
+
95
+ class FeatureFusionBlock(nn.Module):
96
+ """Feature fusion block.
97
+ """
98
+
99
+ def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True, size=None):
100
+ """Init.
101
+
102
+ Args:
103
+ features (int): number of features
104
+ """
105
+ super(FeatureFusionBlock, self).__init__()
106
+
107
+ self.deconv = deconv
108
+ self.align_corners = align_corners
109
+
110
+ self.groups=1
111
+
112
+ self.expand = expand
113
+ out_features = features
114
+ if self.expand==True:
115
+ out_features = features//2
116
+
117
+ self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
118
+
119
+ self.resConfUnit1 = ResidualConvUnit(features, activation, bn)
120
+ self.resConfUnit2 = ResidualConvUnit(features, activation, bn)
121
+
122
+ self.skip_add = nn.quantized.FloatFunctional()
123
+
124
+ self.size=size
125
+
126
+ def forward(self, *xs, size=None):
127
+ """Forward pass.
128
+
129
+ Returns:
130
+ tensor: output
131
+ """
132
+ output = xs[0]
133
+
134
+ if len(xs) == 2:
135
+ res = self.resConfUnit1(xs[1])
136
+ output = self.skip_add.add(output, res)
137
+
138
+ output = self.resConfUnit2(output)
139
+
140
+ if (size is None) and (self.size is None):
141
+ modifier = {"scale_factor": 2}
142
+ elif size is None:
143
+ modifier = {"size": self.size}
144
+ else:
145
+ modifier = {"size": size}
146
+
147
+ output = nn.functional.interpolate(
148
+ output, **modifier, mode="bilinear", align_corners=self.align_corners
149
+ )
150
+
151
+ output = self.out_conv(output)
152
+
153
+ return output
depth_anything/dpt.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
6
+
7
+ from depth_anything.blocks import FeatureFusionBlock, _make_scratch
8
+
9
+
10
+ def _make_fusion_block(features, use_bn, size = None):
11
+ return FeatureFusionBlock(
12
+ features,
13
+ nn.ReLU(False),
14
+ deconv=False,
15
+ bn=use_bn,
16
+ expand=False,
17
+ align_corners=True,
18
+ size=size,
19
+ )
20
+
21
+
22
+ class DPTHead(nn.Module):
23
+ def __init__(self, nclass, in_channels, features=256, use_bn=False, out_channels=[256, 512, 1024, 1024], use_clstoken=False):
24
+ super(DPTHead, self).__init__()
25
+
26
+ self.nclass = nclass
27
+ self.use_clstoken = use_clstoken
28
+
29
+ self.projects = nn.ModuleList([
30
+ nn.Conv2d(
31
+ in_channels=in_channels,
32
+ out_channels=out_channel,
33
+ kernel_size=1,
34
+ stride=1,
35
+ padding=0,
36
+ ) for out_channel in out_channels
37
+ ])
38
+
39
+ self.resize_layers = nn.ModuleList([
40
+ nn.ConvTranspose2d(
41
+ in_channels=out_channels[0],
42
+ out_channels=out_channels[0],
43
+ kernel_size=4,
44
+ stride=4,
45
+ padding=0),
46
+ nn.ConvTranspose2d(
47
+ in_channels=out_channels[1],
48
+ out_channels=out_channels[1],
49
+ kernel_size=2,
50
+ stride=2,
51
+ padding=0),
52
+ nn.Identity(),
53
+ nn.Conv2d(
54
+ in_channels=out_channels[3],
55
+ out_channels=out_channels[3],
56
+ kernel_size=3,
57
+ stride=2,
58
+ padding=1)
59
+ ])
60
+
61
+ if use_clstoken:
62
+ self.readout_projects = nn.ModuleList()
63
+ for _ in range(len(self.projects)):
64
+ self.readout_projects.append(
65
+ nn.Sequential(
66
+ nn.Linear(2 * in_channels, in_channels),
67
+ nn.GELU()))
68
+
69
+ self.scratch = _make_scratch(
70
+ out_channels,
71
+ features,
72
+ groups=1,
73
+ expand=False,
74
+ )
75
+
76
+ self.scratch.stem_transpose = None
77
+
78
+ self.scratch.refinenet1 = _make_fusion_block(features, use_bn)
79
+ self.scratch.refinenet2 = _make_fusion_block(features, use_bn)
80
+ self.scratch.refinenet3 = _make_fusion_block(features, use_bn)
81
+ self.scratch.refinenet4 = _make_fusion_block(features, use_bn)
82
+
83
+ head_features_1 = features
84
+ head_features_2 = 32
85
+
86
+ if nclass > 1:
87
+ self.scratch.output_conv = nn.Sequential(
88
+ nn.Conv2d(head_features_1, head_features_1, kernel_size=3, stride=1, padding=1),
89
+ nn.ReLU(True),
90
+ nn.Conv2d(head_features_1, nclass, kernel_size=1, stride=1, padding=0),
91
+ )
92
+ else:
93
+ self.scratch.output_conv1 = nn.Conv2d(head_features_1, head_features_1 // 2, kernel_size=3, stride=1, padding=1)
94
+
95
+ self.scratch.output_conv2 = nn.Sequential(
96
+ nn.Conv2d(head_features_1 // 2, head_features_2, kernel_size=3, stride=1, padding=1),
97
+ nn.ReLU(True),
98
+ nn.Conv2d(head_features_2, 1, kernel_size=1, stride=1, padding=0),
99
+ nn.ReLU(True),
100
+ nn.Identity(),
101
+ )
102
+
103
+ def forward(self, out_features, patch_h, patch_w):
104
+ out = []
105
+ for i, x in enumerate(out_features):
106
+ if self.use_clstoken:
107
+ x, cls_token = x[0], x[1]
108
+ readout = cls_token.unsqueeze(1).expand_as(x)
109
+ x = self.readout_projects[i](torch.cat((x, readout), -1))
110
+ else:
111
+ x = x[0]
112
+
113
+ x = x.permute(0, 2, 1).reshape((x.shape[0], x.shape[-1], patch_h, patch_w))
114
+
115
+ x = self.projects[i](x)
116
+ x = self.resize_layers[i](x)
117
+
118
+ out.append(x)
119
+
120
+ layer_1, layer_2, layer_3, layer_4 = out
121
+
122
+ layer_1_rn = self.scratch.layer1_rn(layer_1)
123
+ layer_2_rn = self.scratch.layer2_rn(layer_2)
124
+ layer_3_rn = self.scratch.layer3_rn(layer_3)
125
+ layer_4_rn = self.scratch.layer4_rn(layer_4)
126
+
127
+ path_4 = self.scratch.refinenet4(layer_4_rn, size=layer_3_rn.shape[2:])
128
+ path_3 = self.scratch.refinenet3(path_4, layer_3_rn, size=layer_2_rn.shape[2:])
129
+ path_2 = self.scratch.refinenet2(path_3, layer_2_rn, size=layer_1_rn.shape[2:])
130
+ path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
131
+
132
+ out = self.scratch.output_conv1(path_1)
133
+ out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True)
134
+ out = self.scratch.output_conv2(out)
135
+
136
+ return out
137
+
138
+
139
+ class DPT_DINOv2(nn.Module):
140
+ def __init__(self, encoder='vitl', features=256, out_channels=[256, 512, 1024, 1024], use_bn=False, use_clstoken=False, localhub=True):
141
+ super(DPT_DINOv2, self).__init__()
142
+
143
+ assert encoder in ['vits', 'vitb', 'vitl']
144
+
145
+ # in case the Internet connection is not stable, please load the DINOv2 locally
146
+ if localhub:
147
+ self.pretrained = torch.hub.load('torchhub/facebookresearch_dinov2_main', 'dinov2_{:}14'.format(encoder), source='local', pretrained=False)
148
+ else:
149
+ self.pretrained = torch.hub.load('facebookresearch/dinov2', 'dinov2_{:}14'.format(encoder))
150
+
151
+ dim = self.pretrained.blocks[0].attn.qkv.in_features
152
+
153
+ self.depth_head = DPTHead(1, dim, features, use_bn, out_channels=out_channels, use_clstoken=use_clstoken)
154
+
155
+ def forward(self, x):
156
+ h, w = x.shape[-2:]
157
+
158
+ features = self.pretrained.get_intermediate_layers(x, 4, return_class_token=True)
159
+
160
+ patch_h, patch_w = h // 14, w // 14
161
+
162
+ depth = self.depth_head(features, patch_h, patch_w)
163
+ depth = F.interpolate(depth, size=(h, w), mode="bilinear", align_corners=True)
164
+ depth = F.relu(depth)
165
+
166
+ return depth.squeeze(1)
167
+
168
+
169
+ class DepthAnything(DPT_DINOv2, PyTorchModelHubMixin):
170
+ def __init__(self, config):
171
+ super().__init__(**config)
172
+
173
+
174
+ if __name__ == '__main__':
175
+ parser = argparse.ArgumentParser()
176
+ parser.add_argument(
177
+ "--encoder",
178
+ default="vits",
179
+ type=str,
180
+ choices=["vits", "vitb", "vitl"],
181
+ )
182
+ args = parser.parse_args()
183
+
184
+ model = DepthAnything.from_pretrained("LiheYoung/depth_anything_{:}14".format(args.encoder))
185
+
186
+ print(model)
187
+
depth_anything/util/__pycache__/transform.cpython-39.pyc ADDED
Binary file (6.07 kB). View file
 
depth_anything/util/transform.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from PIL import Image, ImageOps, ImageFilter
3
+ import torch
4
+ from torchvision import transforms
5
+ import torch.nn.functional as F
6
+
7
+ import numpy as np
8
+ import cv2
9
+ import math
10
+
11
+
12
+ def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
13
+ """Rezise the sample to ensure the given size. Keeps aspect ratio.
14
+
15
+ Args:
16
+ sample (dict): sample
17
+ size (tuple): image size
18
+
19
+ Returns:
20
+ tuple: new size
21
+ """
22
+ shape = list(sample["disparity"].shape)
23
+
24
+ if shape[0] >= size[0] and shape[1] >= size[1]:
25
+ return sample
26
+
27
+ scale = [0, 0]
28
+ scale[0] = size[0] / shape[0]
29
+ scale[1] = size[1] / shape[1]
30
+
31
+ scale = max(scale)
32
+
33
+ shape[0] = math.ceil(scale * shape[0])
34
+ shape[1] = math.ceil(scale * shape[1])
35
+
36
+ # resize
37
+ sample["image"] = cv2.resize(
38
+ sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method
39
+ )
40
+
41
+ sample["disparity"] = cv2.resize(
42
+ sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST
43
+ )
44
+ sample["mask"] = cv2.resize(
45
+ sample["mask"].astype(np.float32),
46
+ tuple(shape[::-1]),
47
+ interpolation=cv2.INTER_NEAREST,
48
+ )
49
+ sample["mask"] = sample["mask"].astype(bool)
50
+
51
+ return tuple(shape)
52
+
53
+
54
+ class Resize(object):
55
+ """Resize sample to given size (width, height).
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ width,
61
+ height,
62
+ resize_target=True,
63
+ keep_aspect_ratio=False,
64
+ ensure_multiple_of=1,
65
+ resize_method="lower_bound",
66
+ image_interpolation_method=cv2.INTER_AREA,
67
+ ):
68
+ """Init.
69
+
70
+ Args:
71
+ width (int): desired output width
72
+ height (int): desired output height
73
+ resize_target (bool, optional):
74
+ True: Resize the full sample (image, mask, target).
75
+ False: Resize image only.
76
+ Defaults to True.
77
+ keep_aspect_ratio (bool, optional):
78
+ True: Keep the aspect ratio of the input sample.
79
+ Output sample might not have the given width and height, and
80
+ resize behaviour depends on the parameter 'resize_method'.
81
+ Defaults to False.
82
+ ensure_multiple_of (int, optional):
83
+ Output width and height is constrained to be multiple of this parameter.
84
+ Defaults to 1.
85
+ resize_method (str, optional):
86
+ "lower_bound": Output will be at least as large as the given size.
87
+ "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
88
+ "minimal": Scale as least as possible. (Output size might be smaller than given size.)
89
+ Defaults to "lower_bound".
90
+ """
91
+ self.__width = width
92
+ self.__height = height
93
+
94
+ self.__resize_target = resize_target
95
+ self.__keep_aspect_ratio = keep_aspect_ratio
96
+ self.__multiple_of = ensure_multiple_of
97
+ self.__resize_method = resize_method
98
+ self.__image_interpolation_method = image_interpolation_method
99
+
100
+ def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
101
+ y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
102
+
103
+ if max_val is not None and y > max_val:
104
+ y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int)
105
+
106
+ if y < min_val:
107
+ y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int)
108
+
109
+ return y
110
+
111
+ def get_size(self, width, height):
112
+ # determine new height and width
113
+ scale_height = self.__height / height
114
+ scale_width = self.__width / width
115
+
116
+ if self.__keep_aspect_ratio:
117
+ if self.__resize_method == "lower_bound":
118
+ # scale such that output size is lower bound
119
+ if scale_width > scale_height:
120
+ # fit width
121
+ scale_height = scale_width
122
+ else:
123
+ # fit height
124
+ scale_width = scale_height
125
+ elif self.__resize_method == "upper_bound":
126
+ # scale such that output size is upper bound
127
+ if scale_width < scale_height:
128
+ # fit width
129
+ scale_height = scale_width
130
+ else:
131
+ # fit height
132
+ scale_width = scale_height
133
+ elif self.__resize_method == "minimal":
134
+ # scale as least as possbile
135
+ if abs(1 - scale_width) < abs(1 - scale_height):
136
+ # fit width
137
+ scale_height = scale_width
138
+ else:
139
+ # fit height
140
+ scale_width = scale_height
141
+ else:
142
+ raise ValueError(
143
+ f"resize_method {self.__resize_method} not implemented"
144
+ )
145
+
146
+ if self.__resize_method == "lower_bound":
147
+ new_height = self.constrain_to_multiple_of(
148
+ scale_height * height, min_val=self.__height
149
+ )
150
+ new_width = self.constrain_to_multiple_of(
151
+ scale_width * width, min_val=self.__width
152
+ )
153
+ elif self.__resize_method == "upper_bound":
154
+ new_height = self.constrain_to_multiple_of(
155
+ scale_height * height, max_val=self.__height
156
+ )
157
+ new_width = self.constrain_to_multiple_of(
158
+ scale_width * width, max_val=self.__width
159
+ )
160
+ elif self.__resize_method == "minimal":
161
+ new_height = self.constrain_to_multiple_of(scale_height * height)
162
+ new_width = self.constrain_to_multiple_of(scale_width * width)
163
+ else:
164
+ raise ValueError(f"resize_method {self.__resize_method} not implemented")
165
+
166
+ return (new_width, new_height)
167
+
168
+ def __call__(self, sample):
169
+ width, height = self.get_size(
170
+ sample["image"].shape[1], sample["image"].shape[0]
171
+ )
172
+
173
+ # resize sample
174
+ sample["image"] = cv2.resize(
175
+ sample["image"],
176
+ (width, height),
177
+ interpolation=self.__image_interpolation_method,
178
+ )
179
+
180
+ if self.__resize_target:
181
+ if "disparity" in sample:
182
+ sample["disparity"] = cv2.resize(
183
+ sample["disparity"],
184
+ (width, height),
185
+ interpolation=cv2.INTER_NEAREST,
186
+ )
187
+
188
+ if "depth" in sample:
189
+ sample["depth"] = cv2.resize(
190
+ sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST
191
+ )
192
+
193
+ if "semseg_mask" in sample:
194
+ # sample["semseg_mask"] = cv2.resize(
195
+ # sample["semseg_mask"], (width, height), interpolation=cv2.INTER_NEAREST
196
+ # )
197
+ sample["semseg_mask"] = F.interpolate(torch.from_numpy(sample["semseg_mask"]).float()[None, None, ...], (height, width), mode='nearest').numpy()[0, 0]
198
+
199
+ if "mask" in sample:
200
+ sample["mask"] = cv2.resize(
201
+ sample["mask"].astype(np.float32),
202
+ (width, height),
203
+ interpolation=cv2.INTER_NEAREST,
204
+ )
205
+ # sample["mask"] = sample["mask"].astype(bool)
206
+
207
+ # print(sample['image'].shape, sample['depth'].shape)
208
+ return sample
209
+
210
+
211
+ class NormalizeImage(object):
212
+ """Normlize image by given mean and std.
213
+ """
214
+
215
+ def __init__(self, mean, std):
216
+ self.__mean = mean
217
+ self.__std = std
218
+
219
+ def __call__(self, sample):
220
+ sample["image"] = (sample["image"] - self.__mean) / self.__std
221
+
222
+ return sample
223
+
224
+
225
+ class PrepareForNet(object):
226
+ """Prepare sample for usage as network input.
227
+ """
228
+
229
+ def __init__(self):
230
+ pass
231
+
232
+ def __call__(self, sample):
233
+ image = np.transpose(sample["image"], (2, 0, 1))
234
+ sample["image"] = np.ascontiguousarray(image).astype(np.float32)
235
+
236
+ if "mask" in sample:
237
+ sample["mask"] = sample["mask"].astype(np.float32)
238
+ sample["mask"] = np.ascontiguousarray(sample["mask"])
239
+
240
+ if "depth" in sample:
241
+ depth = sample["depth"].astype(np.float32)
242
+ sample["depth"] = np.ascontiguousarray(depth)
243
+
244
+ if "semseg_mask" in sample:
245
+ sample["semseg_mask"] = sample["semseg_mask"].astype(np.float32)
246
+ sample["semseg_mask"] = np.ascontiguousarray(sample["semseg_mask"])
247
+
248
+ return sample
depthanything_server.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ import os
5
+ from PIL import Image
6
+ import torch
7
+ import torch.nn.functional as F
8
+ from torchvision.transforms import Compose
9
+
10
+ from depth_anything.dpt import DepthAnything
11
+ from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
12
+
13
+
14
+ transform = Compose([
15
+ Resize(
16
+ width=518,
17
+ height=518,
18
+ resize_target=False,
19
+ keep_aspect_ratio=True,
20
+ ensure_multiple_of=14,
21
+ resize_method='lower_bound',
22
+ image_interpolation_method=cv2.INTER_CUBIC,
23
+ ),
24
+ NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
25
+ PrepareForNet(),
26
+ ])
27
+
28
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
29
+ model = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(DEVICE).eval()
30
+
31
+
32
+ def predict_depthmap(image):
33
+ original_image = image.copy()
34
+
35
+ h, w = image.shape[:2]
36
+
37
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
38
+ image = transform({'image': image})['image']
39
+ image = torch.from_numpy(image).unsqueeze(0).to(DEVICE)
40
+
41
+ with torch.no_grad():
42
+ depth = model(image)
43
+ depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
44
+ depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
45
+ depth = depth.cpu().numpy().astype(np.uint8)
46
+ colored_depth = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)[:, :, ::-1]
47
+
48
+ # colored_depth = Image.fromarray(cv2.cvtColor(colored_depth, cv2.COLOR_BGR2RGB))
49
+ corlored_depth = Image.fromarray(colored_depth)
50
+
51
+ return colored_depth
52
+
53
+
54
+ demo = gr.Interface(fn=predict_depthmap, inputs=[gr.Image()],
55
+ outputs=[gr.Image(type="pil")]
56
+ )
57
+
58
+ demo.launch(share=True, server_name="localhost", server_port=8082)
59
+
60
+
61
+
gallery.md ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # $Depth$ $Anything$ ${\color{crimson}G\color{coral}a\color{royalblue}l\color{olive}l\color{teal}e\color{navy}r\color{plum}y}$
2
+
3
+
4
+
5
+ Here we exhibit awesome community showcases of Depth Anything. Thank all the users for sharing them on the Internet (mainly from Twitter).
6
+
7
+ We organize these cases into three groups: [**image**](#image), [**video**](#video), and [**3D**](#3d).
8
+
9
+
10
+ ## Image
11
+
12
+ You can click on the titles below to be directed to corresponding source pages.
13
+
14
+ ### [Monument Valley](https://twitter.com/weebney/status/1749541957108441309)
15
+
16
+ <img src="assets/gallery/monument_valley.jpg" width="60%"/>
17
+
18
+ ### [Cyber rabbit monitoring screens](https://twitter.com/hayas1357/status/1749298607260316139)
19
+
20
+ <img src="assets/gallery/cyber_rabbit.jpg" width="60%"/>
21
+
22
+ ### [Astronaut cat](https://twitter.com/nanase_ja/status/1749653152406884392)
23
+
24
+ <img src="assets/gallery/astronaut_cat.jpg" width="60%"/>
25
+
26
+ ### [Animation images](https://twitter.com/PlayShingo/status/1750368475867128200)
27
+
28
+ <img src="assets/gallery/animation_image.jpg" width="90%"/>
29
+
30
+ ### [DALL·E bear](https://twitter.com/letalvoj/status/1749341999646347741)
31
+
32
+ <img src="assets/gallery/dalle_bear.jpg" width="60%"/>
33
+
34
+ ### [Cat](https://twitter.com/sajilobroker/status/1749364184419016846)
35
+
36
+ <img src="assets/gallery/cat.jpg" width="60%"/>
37
+
38
+ ### [Surprised bald man](https://twitter.com/mayfer/status/1749712454408679780)
39
+
40
+ <img src="assets/gallery/surprised_bald_man.jpg" width="60%"/>
41
+
42
+ ### [Minecraft](https://twitter.com/BarlowTwin/status/1749353070008693224)
43
+
44
+ <img src="assets/gallery/minecraft.jpg" width="90%"/>
45
+
46
+ ### [Robotic knight amidst lightning](https://twitter.com/IterIntellectus/status/1749432836158021738)
47
+
48
+ <img src="assets/gallery/robotic_knight.jpg" width="45%"/>
49
+
50
+ ### [Football game](https://twitter.com/AB9Mamun/status/1751202608545456235)
51
+
52
+ <img src="assets/gallery/football_game.jpg" width="60%"/>
53
+
54
+ ### [Classical raft painting](https://twitter.com/acidbjazz/status/1749491155698331774)
55
+
56
+ <img src="assets/gallery/raft_painting.jpg" width="60%"/>
57
+
58
+ ### [Diner scene](https://twitter.com/R0b0tSp1der/status/1749301061964435846)
59
+
60
+ <img src="assets/gallery/diner_scene.jpg" width="60%"/>
61
+
62
+ ### [Elon Musk](https://twitter.com/ai_for_success/status/1749304903418482954)
63
+
64
+ <img src="assets/gallery/elon_musk.jpg" width="60%"/>
65
+
66
+ ### [Painted tunnel](https://twitter.com/NodiMend/status/1750800040304492814)
67
+
68
+ <img src="assets/gallery/painted_tunnel.jpg" width="40%"/>
69
+
70
+ ### [Iron man](https://twitter.com/ai_for_success/status/1749304906664808751)
71
+
72
+ <img src="assets/gallery/iron_man.jpg" width="60%"/>
73
+
74
+ ### [Skull](https://twitter.com/ai_for_success/status/1749304909730906381)
75
+
76
+ <img src="assets/gallery/skull.jpg" width="60%"/>
77
+
78
+ ### [Chibi cat-eared character](https://twitter.com/nanase_ja/status/1749484958522204605)
79
+
80
+ <img src="assets/gallery/chibi_cateared_character.jpg" width="60%"/>
81
+
82
+ ### [Exuberant gamer celebration](https://twitter.com/hmaon/status/1749372352016625748)
83
+
84
+ <img src="assets/gallery/gamer_celebration.jpg" width="60%"/>
85
+
86
+ ### [Ocean](https://twitter.com/jarrahorphin/status/1749878678111309870)
87
+
88
+ <img src="assets/gallery/ocean.jpg" width="60%"/>
89
+
90
+ ### [Aerial images](https://twitter.com/lTlanual/status/1749641678124892384)
91
+
92
+ <img src="assets/gallery/aerial_image.jpg" width="60%"/>
93
+
94
+ ### [Grilled chicken skewers](https://twitter.com/promptlord/status/1752323556409856157)
95
+
96
+ <img src="assets/gallery/grilled_chicken_skewers.jpg" width="60%"/>
97
+
98
+ ### [Artistic images](https://twitter.com/ZainHasan6/status/1753553755998416933)
99
+
100
+ <img src="assets/gallery/artistic_image.jpg" width="90%"/>
101
+
102
+ ### [Iconic distracted man](https://twitter.com/ZainHasan6/status/1749308193237303620)
103
+
104
+ <img src="assets/gallery/distracted_man.jpg" width="60%"/>
105
+
106
+ ### [Eye-stalked](https://twitter.com/RJdoesVR/status/1749494967800590780)
107
+
108
+ <img src="assets/gallery/eye-stalked.jpg" width="60%"/>
109
+
110
+ ### [Tearful green frog](https://twitter.com/qsdnl/status/1749298425064313080)
111
+
112
+ <img src="assets/gallery/tearful_green_frog.jpg" width="60%"/>
113
+
114
+
115
+ ## Video
116
+
117
+ For more online showcases, please refer to https://twitter.com/WilliamLamkin/status/1755623301907460582.
118
+
119
+ The videos below may be slow to load. Please wait a moment.
120
+
121
+ ### [Racing game](https://twitter.com/i/status/1750683014152040853)
122
+
123
+ <img src="assets/gallery/racing_car.gif" width="80%"/>
124
+
125
+ ### [Building](https://twitter.com/WayneINR/status/1750945037863551247)
126
+
127
+ <img src="assets/gallery/building.gif" width="80%"/>
128
+
129
+ ### [nuScenes](https://github.com/scepter914/DepthAnything-ROS)
130
+
131
+ <img src="assets/gallery/nuscenes.gif" width="80%"/>
132
+
133
+ ### [Indoor moving](https://twitter.com/PINTO03091/status/1750162506453041437)
134
+
135
+ <img src="assets/gallery/indoor_moving.gif" width="40%"/>
136
+
137
+
138
+ ## 3D
139
+
140
+ The videos below may be slow to load. Please wait a moment.
141
+
142
+ ### [3D visualization](https://twitter.com/victormustar/status/1753008143469093212)
143
+
144
+ <img src="assets/gallery/3d_vis1.gif" width="50%"/><br><br>
145
+ <img src="assets/gallery/3d_vis2.gif" width="50%"/>
146
+
147
+ ### [2D videos to 3D videos](https://twitter.com/stspanho/status/1751709292913143895)
148
+
149
+ <img src="assets/gallery/3d_video.gif" width="60%"/>
150
+
151
+ ### Reconstruction
152
+
153
+ - [case1](https://twitter.com/Artoid_XYZ/status/1751542601772421378)
154
+
155
+ <img src="assets/gallery/reconstruction2.jpeg" width="60%"/>
156
+
157
+ - [case2](https://twitter.com/DennisLoevlie/status/1753846358463709489)
158
+
159
+ <img src="assets/gallery/reconstruction.jpg" width="60%"/>
160
+
metric_depth/README.md ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Depth Anything for Metric Depth Estimation
2
+
3
+ Our Depth Anything models primarily focus on robust *relative* depth estimation. To achieve *metric* depth estimation, we follow ZoeDepth to fine-tune from our Depth Anything pre-trained encoder with metric depth information from NYUv2 or KITTI.
4
+
5
+
6
+ ## Performance
7
+
8
+ ### *In-domain* metric depth estimation
9
+
10
+ #### NYUv2
11
+
12
+ | Method | $\delta_1 \uparrow$ | $\delta_2 \uparrow$ | $\delta_3 \uparrow$ | AbsRel $\downarrow$ | RMSE $\downarrow$ | log10 $\downarrow$ |
13
+ |:-:|:-:|:-:|:-:|:-:|:-:|:-:|
14
+ | ZoeDepth | 0.951 | 0.994 | 0.999 | 0.077 | 0.282 | 0.033 |
15
+ | Depth Anything | **0.984** | **0.998** | **1.000** | **0.056** | **0.206** | **0.024** |
16
+
17
+
18
+ #### KITTI
19
+
20
+ | Method | $\delta_1 \uparrow$ | $\delta_2 \uparrow$ | $\delta_3 \uparrow$ | AbsRel $\downarrow$ | RMSE $\downarrow$ | log10 $\downarrow$ |
21
+ |:-:|:-:|:-:|:-:|:-:|:-:|:-:|
22
+ | ZoeDepth | 0.971 | 0.996 | 0.999 | 0.054 | 2.281 | 0.082 |
23
+ | Depth Anything | **0.982** | **0.998** | **1.000** | **0.046** | **1.896** | **0.069** |
24
+
25
+
26
+ ### *Zero-shot* metric depth estimation
27
+
28
+ Indoor: NYUv2 $\rightarrow$ SUN RGB-D, iBims-1, and HyperSim<br>
29
+ Outdoor: KITTI $\rightarrow$ Virtual KITTI 2 and DIODE Outdoor
30
+
31
+
32
+ | Method | SUN || iBims || HyperSim || vKITTI || DIODE Outdoor ||
33
+ |-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
34
+ | | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ |
35
+ | ZoeDepth | 0.520 | 0.545 | 0.169 | 0.656 | 0.407 | 0.302 | 0.106 | 0.844 | 0.814 | 0.237 |
36
+ | Depth Anything | **0.500** | **0.660** | **0.150** | **0.714** | **0.363** | **0.361** | **0.085** | **0.913** | **0.794** | **0.288** |
37
+
38
+
39
+
40
+
41
+ ## Pre-trained metric depth estimation models
42
+
43
+ We provide [two pre-trained models](https://huggingface.co/spaces/LiheYoung/Depth-Anything/tree/main/checkpoints_metric_depth), one for *indoor* metric depth estimation trained on NYUv2, and the other for *outdoor* metric depth estimation trained on KITTI.
44
+
45
+ ## Installation
46
+
47
+ ```bash
48
+ conda env create -n depth_anything_metric --file environment.yml
49
+ conda activate depth_anything_metric
50
+ ```
51
+
52
+ Please follow [ZoeDepth](https://github.com/isl-org/ZoeDepth) to prepare the training and test datasets.
53
+
54
+ ## Evaluation
55
+
56
+ Make sure you have downloaded our pre-trained metric-depth models [here](https://huggingface.co/spaces/LiheYoung/Depth-Anything/tree/main/checkpoints_metric_depth) (for evaluation) and pre-trained relative-depth model [here](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints/depth_anything_vitl14.pth) (for initializing the encoder) and put them under the ``checkpoints`` directory.
57
+
58
+ Indoor:
59
+ ```bash
60
+ python evaluate.py -m zoedepth --pretrained_resource="local::./checkpoints/depth_anything_metric_depth_indoor.pt" -d <nyu | sunrgbd | ibims | hypersim_test>
61
+ ```
62
+
63
+ Outdoor:
64
+ ```bash
65
+ python evaluate.py -m zoedepth --pretrained_resource="local::./checkpoints/depth_anything_metric_depth_outdoor.pt" -d <kitti | vkitti2 | diode_outdoor>
66
+ ```
67
+
68
+ ## Training
69
+
70
+ Please first download our Depth Anything pre-trained model [here](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints/depth_anything_vitl14.pth), and put it under the ``checkpoints`` directory.
71
+
72
+ ```bash
73
+ python train_mono.py -m zoedepth -d <nyu | kitti> --pretrained_resource=""
74
+ ```
75
+
76
+ This will automatically use our Depth Anything pre-trained ViT-L encoder.
77
+
78
+ ## Citation
79
+
80
+ If you find this project useful, please consider citing:
81
+
82
+ ```bibtex
83
+ @article{depthanything,
84
+ title={Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data},
85
+ author={Yang, Lihe and Kang, Bingyi and Huang, Zilong and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang},
86
+ journal={arXiv:2401.10891},
87
+ year={2024},
88
+ }
89
+ ```
metric_depth/depth_to_pointcloud.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Born out of Issue 36.
2
+ # Allows the user to set up own test files to infer on (Create a folder my_test and add subfolder input and output in the metric_depth directory before running this script.)
3
+ # Make sure you have the necessary libraries
4
+ # Code by @1ssb
5
+
6
+ import argparse
7
+ import os
8
+ import glob
9
+ import torch
10
+ import numpy as np
11
+ from PIL import Image
12
+ import torchvision.transforms as transforms
13
+ import open3d as o3d
14
+ from tqdm import tqdm
15
+ from zoedepth.models.builder import build_model
16
+ from zoedepth.utils.config import get_config
17
+
18
+ # Global settings
19
+ FL = 715.0873
20
+ FY = 256 * 0.6
21
+ FX = 256 * 0.6
22
+ NYU_DATA = False
23
+ FINAL_HEIGHT = 256
24
+ FINAL_WIDTH = 256
25
+ INPUT_DIR = './my_test/input'
26
+ OUTPUT_DIR = './my_test/output'
27
+ DATASET = 'nyu' # Lets not pick a fight with the model's dataloader
28
+
29
+ def process_images(model):
30
+ if not os.path.exists(OUTPUT_DIR):
31
+ os.makedirs(OUTPUT_DIR)
32
+
33
+ image_paths = glob.glob(os.path.join(INPUT_DIR, '*.png')) + glob.glob(os.path.join(INPUT_DIR, '*.jpg'))
34
+ for image_path in tqdm(image_paths, desc="Processing Images"):
35
+ try:
36
+ color_image = Image.open(image_path).convert('RGB')
37
+ original_width, original_height = color_image.size
38
+ image_tensor = transforms.ToTensor()(color_image).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
39
+
40
+ pred = model(image_tensor, dataset=DATASET)
41
+ if isinstance(pred, dict):
42
+ pred = pred.get('metric_depth', pred.get('out'))
43
+ elif isinstance(pred, (list, tuple)):
44
+ pred = pred[-1]
45
+ pred = pred.squeeze().detach().cpu().numpy()
46
+
47
+ # Resize color image and depth to final size
48
+ resized_color_image = color_image.resize((FINAL_WIDTH, FINAL_HEIGHT), Image.LANCZOS)
49
+ resized_pred = Image.fromarray(pred).resize((FINAL_WIDTH, FINAL_HEIGHT), Image.NEAREST)
50
+
51
+ focal_length_x, focal_length_y = (FX, FY) if not NYU_DATA else (FL, FL)
52
+ x, y = np.meshgrid(np.arange(FINAL_WIDTH), np.arange(FINAL_HEIGHT))
53
+ x = (x - FINAL_WIDTH / 2) / focal_length_x
54
+ y = (y - FINAL_HEIGHT / 2) / focal_length_y
55
+ z = np.array(resized_pred)
56
+ points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3)
57
+ colors = np.array(resized_color_image).reshape(-1, 3) / 255.0
58
+
59
+ pcd = o3d.geometry.PointCloud()
60
+ pcd.points = o3d.utility.Vector3dVector(points)
61
+ pcd.colors = o3d.utility.Vector3dVector(colors)
62
+ o3d.io.write_point_cloud(os.path.join(OUTPUT_DIR, os.path.splitext(os.path.basename(image_path))[0] + ".ply"), pcd)
63
+ except Exception as e:
64
+ print(f"Error processing {image_path}: {e}")
65
+
66
+ def main(model_name, pretrained_resource):
67
+ config = get_config(model_name, "eval", DATASET)
68
+ config.pretrained_resource = pretrained_resource
69
+ model = build_model(config).to('cuda' if torch.cuda.is_available() else 'cpu')
70
+ model.eval()
71
+ process_images(model)
72
+
73
+ if __name__ == '__main__':
74
+ parser = argparse.ArgumentParser()
75
+ parser.add_argument("-m", "--model", type=str, default='zoedepth', help="Name of the model to test")
76
+ parser.add_argument("-p", "--pretrained_resource", type=str, default='local::./checkpoints/depth_anything_metric_depth_indoor.pt', help="Pretrained resource to use for fetching weights.")
77
+
78
+ args = parser.parse_args()
79
+ main(args.model, args.pretrained_resource)
metric_depth/environment.yml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: zoe
2
+ channels:
3
+ - pytorch
4
+ - nvidia
5
+ - conda-forge
6
+ dependencies:
7
+ - cuda=11.7.1
8
+ - h5py=3.7.0
9
+ - hdf5=1.12.2
10
+ - matplotlib=3.6.2
11
+ - matplotlib-base=3.6.2
12
+ - numpy=1.24.1
13
+ - opencv=4.6.0
14
+ - pip=22.3.1
15
+ - python=3.9.7
16
+ - pytorch=1.13.1
17
+ - pytorch-cuda=11.7
18
+ - pytorch-mutex=1.0
19
+ - scipy=1.10.0
20
+ - torchaudio=0.13.1
21
+ - torchvision=0.14.1
22
+ - pip:
23
+ - huggingface-hub==0.11.1
24
+ - timm==0.6.12
25
+ - tqdm==4.64.1
26
+ - wandb==0.13.9
metric_depth/evaluate.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import argparse
26
+ from pprint import pprint
27
+
28
+ import torch
29
+ from zoedepth.utils.easydict import EasyDict as edict
30
+ from tqdm import tqdm
31
+
32
+ from zoedepth.data.data_mono import DepthDataLoader
33
+ from zoedepth.models.builder import build_model
34
+ from zoedepth.utils.arg_utils import parse_unknown
35
+ from zoedepth.utils.config import change_dataset, get_config, ALL_EVAL_DATASETS, ALL_INDOOR, ALL_OUTDOOR
36
+ from zoedepth.utils.misc import (RunningAverageDict, colors, compute_metrics,
37
+ count_parameters)
38
+
39
+
40
+ @torch.no_grad()
41
+ def infer(model, images, **kwargs):
42
+ """Inference with flip augmentation"""
43
+ # images.shape = N, C, H, W
44
+ def get_depth_from_prediction(pred):
45
+ if isinstance(pred, torch.Tensor):
46
+ pred = pred # pass
47
+ elif isinstance(pred, (list, tuple)):
48
+ pred = pred[-1]
49
+ elif isinstance(pred, dict):
50
+ pred = pred['metric_depth'] if 'metric_depth' in pred else pred['out']
51
+ else:
52
+ raise NotImplementedError(f"Unknown output type {type(pred)}")
53
+ return pred
54
+
55
+ pred1 = model(images, **kwargs)
56
+ pred1 = get_depth_from_prediction(pred1)
57
+
58
+ pred2 = model(torch.flip(images, [3]), **kwargs)
59
+ pred2 = get_depth_from_prediction(pred2)
60
+ pred2 = torch.flip(pred2, [3])
61
+
62
+ mean_pred = 0.5 * (pred1 + pred2)
63
+
64
+ return mean_pred
65
+
66
+
67
+ @torch.no_grad()
68
+ def evaluate(model, test_loader, config, round_vals=True, round_precision=3):
69
+ model.eval()
70
+ metrics = RunningAverageDict()
71
+ for i, sample in tqdm(enumerate(test_loader), total=len(test_loader)):
72
+ if 'has_valid_depth' in sample:
73
+ if not sample['has_valid_depth']:
74
+ continue
75
+ image, depth = sample['image'], sample['depth']
76
+ image, depth = image.cuda(), depth.cuda()
77
+ depth = depth.squeeze().unsqueeze(0).unsqueeze(0)
78
+ focal = sample.get('focal', torch.Tensor(
79
+ [715.0873]).cuda()) # This magic number (focal) is only used for evaluating BTS model
80
+ pred = infer(model, image, dataset=sample['dataset'][0], focal=focal)
81
+
82
+ # Save image, depth, pred for visualization
83
+ if "save_images" in config and config.save_images:
84
+ import os
85
+ # print("Saving images ...")
86
+ from PIL import Image
87
+ import torchvision.transforms as transforms
88
+ from zoedepth.utils.misc import colorize
89
+
90
+ os.makedirs(config.save_images, exist_ok=True)
91
+ # def save_image(img, path):
92
+ d = colorize(depth.squeeze().cpu().numpy(), 0, 10)
93
+ p = colorize(pred.squeeze().cpu().numpy(), 0, 10)
94
+ im = transforms.ToPILImage()(image.squeeze().cpu())
95
+ im.save(os.path.join(config.save_images, f"{i}_img.png"))
96
+ Image.fromarray(d).save(os.path.join(config.save_images, f"{i}_depth.png"))
97
+ Image.fromarray(p).save(os.path.join(config.save_images, f"{i}_pred.png"))
98
+
99
+
100
+
101
+ # print(depth.shape, pred.shape)
102
+ metrics.update(compute_metrics(depth, pred, config=config))
103
+
104
+ if round_vals:
105
+ def r(m): return round(m, round_precision)
106
+ else:
107
+ def r(m): return m
108
+ metrics = {k: r(v) for k, v in metrics.get_value().items()}
109
+ return metrics
110
+
111
+ def main(config):
112
+ model = build_model(config)
113
+ test_loader = DepthDataLoader(config, 'online_eval').data
114
+ model = model.cuda()
115
+ metrics = evaluate(model, test_loader, config)
116
+ print(f"{colors.fg.green}")
117
+ print(metrics)
118
+ print(f"{colors.reset}")
119
+ metrics['#params'] = f"{round(count_parameters(model, include_all=True)/1e6, 2)}M"
120
+ return metrics
121
+
122
+
123
+ def eval_model(model_name, pretrained_resource, dataset='nyu', **kwargs):
124
+
125
+ # Load default pretrained resource defined in config if not set
126
+ overwrite = {**kwargs, "pretrained_resource": pretrained_resource} if pretrained_resource else kwargs
127
+ config = get_config(model_name, "eval", dataset, **overwrite)
128
+ # config = change_dataset(config, dataset) # change the dataset
129
+ pprint(config)
130
+ print(f"Evaluating {model_name} on {dataset}...")
131
+ metrics = main(config)
132
+ return metrics
133
+
134
+
135
+ if __name__ == '__main__':
136
+ parser = argparse.ArgumentParser()
137
+ parser.add_argument("-m", "--model", type=str,
138
+ required=True, help="Name of the model to evaluate")
139
+ parser.add_argument("-p", "--pretrained_resource", type=str,
140
+ required=False, default="", help="Pretrained resource to use for fetching weights. If not set, default resource from model config is used, Refer models.model_io.load_state_from_resource for more details.")
141
+ parser.add_argument("-d", "--dataset", type=str, required=False,
142
+ default='nyu', help="Dataset to evaluate on")
143
+
144
+ args, unknown_args = parser.parse_known_args()
145
+ overwrite_kwargs = parse_unknown(unknown_args)
146
+
147
+ if "ALL_INDOOR" in args.dataset:
148
+ datasets = ALL_INDOOR
149
+ elif "ALL_OUTDOOR" in args.dataset:
150
+ datasets = ALL_OUTDOOR
151
+ elif "ALL" in args.dataset:
152
+ datasets = ALL_EVAL_DATASETS
153
+ elif "," in args.dataset:
154
+ datasets = args.dataset.split(",")
155
+ else:
156
+ datasets = [args.dataset]
157
+
158
+ for dataset in datasets:
159
+ eval_model(args.model, pretrained_resource=args.pretrained_resource,
160
+ dataset=dataset, **overwrite_kwargs)
metric_depth/point_cloud_on_trackbar.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Born out of Depth Anything V2
3
+ Make sure you have the necessary libraries installed.
4
+ Code by @1ssb
5
+
6
+ This script processes a video to generate depth maps and corresponding point clouds for each frame.
7
+ The resulting depth maps are saved in a video format, and the point clouds can be interactively generated for selected frames.
8
+
9
+ Usage:
10
+ python script.py --video-path path_to_video --input-size 518 --outdir output_directory --encoder vitl --focal-length-x 470.4 --focal-length-y 470.4 --pred-only --grayscale
11
+
12
+ Arguments:
13
+ --video-path: Path to the input video.
14
+ --input-size: Size to which the input frame is resized for depth prediction.
15
+ --outdir: Directory to save the output video and point clouds.
16
+ --encoder: Model encoder to use. Choices are ['vits', 'vitb', 'vitl', 'vitg'].
17
+ --focal-length-x: Focal length along the x-axis.
18
+ --focal-length-y: Focal length along the y-axis.
19
+ --pred-only: Only display the prediction without the original frame.
20
+ --grayscale: Do not apply colorful palette to the depth map.
21
+ """
22
+
23
+ import argparse
24
+ import cv2
25
+ import glob
26
+ import matplotlib
27
+ import numpy as np
28
+ import os
29
+ import torch
30
+ import open3d as o3d
31
+
32
+ from depth_anything_v2.dpt import DepthAnythingV2
33
+
34
+
35
+ def main():
36
+ # Parse command-line arguments
37
+ parser = argparse.ArgumentParser(description='Depth Anything V2 with Point Cloud Generation')
38
+ parser.add_argument('--video-path', type=str, required=True, help='Path to the input video.')
39
+ parser.add_argument('--input-size', type=int, default=518, help='Size to which the input frame is resized for depth prediction.')
40
+ parser.add_argument('--outdir', type=str, default='./vis_video_depth', help='Directory to save the output video and point clouds.')
41
+ parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitb', 'vitl', 'vitg'], help='Model encoder to use.')
42
+ parser.add_argument('--focal-length-x', default=470.4, type=float, help='Focal length along the x-axis.')
43
+ parser.add_argument('--focal-length-y', default=470.4, type=float, help='Focal length along the y-axis.')
44
+ parser.add_argument('--pred-only', dest='pred_only', action='store_true', help='Only display the prediction.')
45
+ parser.add_argument('--grayscale', dest='grayscale', action='store_true', help='Do not apply colorful palette.')
46
+
47
+ args = parser.parse_args()
48
+
49
+ # Determine the device to use (CUDA, MPS, or CPU)
50
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
51
+
52
+ # Model configuration based on the chosen encoder
53
+ model_configs = {
54
+ 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
55
+ 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
56
+ 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
57
+ 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
58
+ }
59
+
60
+ # Initialize the DepthAnythingV2 model with the specified configuration
61
+ depth_anything = DepthAnythingV2(**model_configs[args.encoder])
62
+ depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu'))
63
+ depth_anything = depth_anything.to(DEVICE).eval()
64
+
65
+ # Get the list of video files to process
66
+ if os.path.isfile(args.video_path):
67
+ if args.video_path.endswith('txt'):
68
+ with open(args.video_path, 'r') as f:
69
+ lines = f.read().splitlines()
70
+ else:
71
+ filenames = [args.video_path]
72
+ else:
73
+ filenames = glob.glob(os.path.join(args.video_path, '**/*'), recursive=True)
74
+
75
+ # Create the output directory if it doesn't exist
76
+ os.makedirs(args.outdir, exist_ok=True)
77
+
78
+ margin_width = 50
79
+ cmap = matplotlib.colormaps.get_cmap('Spectral_r')
80
+
81
+ for k, filename in enumerate(filenames):
82
+ print(f'Processing {k+1}/{len(filenames)}: {filename}')
83
+
84
+ raw_video = cv2.VideoCapture(filename)
85
+ frame_width, frame_height = int(raw_video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(raw_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
86
+ frame_rate = int(raw_video.get(cv2.CAP_PROP_FPS))
87
+
88
+ if args.pred_only:
89
+ output_width = frame_width
90
+ else:
91
+ output_width = frame_width * 2 + margin_width
92
+
93
+ output_path = os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.mp4')
94
+ out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (output_width, frame_height))
95
+
96
+ frame_index = 0
97
+ frame_data = []
98
+
99
+ while raw_video.isOpened():
100
+ ret, raw_frame = raw_video.read()
101
+ if not ret:
102
+ break
103
+
104
+ depth = depth_anything.infer_image(raw_frame, args.input_size)
105
+
106
+ depth_normalized = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
107
+ depth_normalized = depth_normalized.astype(np.uint8)
108
+
109
+ if args.grayscale:
110
+ depth_colored = np.repeat(depth_normalized[..., np.newaxis], 3, axis=-1)
111
+ else:
112
+ depth_colored = (cmap(depth_normalized)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8)
113
+
114
+ if args.pred_only:
115
+ out.write(depth_colored)
116
+ else:
117
+ split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255
118
+ combined_frame = cv2.hconcat([raw_frame, split_region, depth_colored])
119
+ out.write(combined_frame)
120
+
121
+ frame_data.append((raw_frame, depth, depth_colored))
122
+ frame_index += 1
123
+
124
+ raw_video.release()
125
+ out.release()
126
+
127
+ # Function to create point cloud from depth map
128
+ def create_point_cloud(raw_frame, depth_map, frame_index):
129
+ height, width = raw_frame.shape[:2]
130
+ focal_length_x = args.focal_length_x
131
+ focal_length_y = args.focal_length_y
132
+
133
+ x, y = np.meshgrid(np.arange(width), np.arange(height))
134
+ x = (x - width / 2) / focal_length_x
135
+ y = (y - height / 2) / focal_length_y
136
+ z = np.array(depth_map)
137
+
138
+ points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3)
139
+ colors = raw_frame.reshape(-1, 3) / 255.0
140
+
141
+ pcd = o3d.geometry.PointCloud()
142
+ pcd.points = o3d.utility.Vector3dVector(points)
143
+ pcd.colors = o3d.utility.Vector3dVector(colors)
144
+
145
+ pcd_path = os.path.join(args.outdir, f'frame_{frame_index}_point_cloud.ply')
146
+ o3d.io.write_point_cloud(pcd_path, pcd)
147
+ print(f'Point cloud saved to {pcd_path}')
148
+
149
+ # Interactive window to select a frame and generate its point cloud
150
+ def on_trackbar(val):
151
+ frame_index = val
152
+ raw_frame, depth_map, _ = frame_data[frame_index]
153
+ create_point_cloud(raw_frame, depth_map, frame_index)
154
+
155
+ if frame_data:
156
+ cv2.namedWindow('Select Frame for Point Cloud')
157
+ cv2.createTrackbar('Frame', 'Select Frame for Point Cloud', 0, frame_index - 1, on_trackbar)
158
+
159
+ while True:
160
+ key = cv2.waitKey(1) & 0xFF
161
+ if key == 27: # Esc key to exit
162
+ break
163
+
164
+ cv2.destroyAllWindows()
165
+
166
+
167
+ if __name__ == '__main__':
168
+ main()
metric_depth/train_mix.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ from zoedepth.utils.misc import count_parameters, parallelize
26
+ from zoedepth.utils.config import get_config
27
+ from zoedepth.utils.arg_utils import parse_unknown
28
+ from zoedepth.trainers.builder import get_trainer
29
+ from zoedepth.models.builder import build_model
30
+ from zoedepth.data.data_mono import MixedNYUKITTI
31
+ import torch.utils.data.distributed
32
+ import torch.multiprocessing as mp
33
+ import torch
34
+ import numpy as np
35
+ from pprint import pprint
36
+ import argparse
37
+ import os
38
+
39
+ os.environ["PYOPENGL_PLATFORM"] = "egl"
40
+ os.environ["WANDB_START_METHOD"] = "thread"
41
+
42
+
43
+ def fix_random_seed(seed: int):
44
+ """
45
+ Fix random seed for reproducibility
46
+
47
+ Args:
48
+ seed (int): random seed
49
+ """
50
+ import random
51
+
52
+ import numpy
53
+ import torch
54
+
55
+ random.seed(seed)
56
+ numpy.random.seed(seed)
57
+ torch.manual_seed(seed)
58
+ torch.cuda.manual_seed(seed)
59
+ torch.cuda.manual_seed_all(seed)
60
+
61
+ torch.backends.cudnn.deterministic = True
62
+ torch.backends.cudnn.benchmark = False
63
+
64
+
65
+ def load_ckpt(config, model, checkpoint_dir="./checkpoints", ckpt_type="best"):
66
+ import glob
67
+ import os
68
+
69
+ from zoedepth.models.model_io import load_wts
70
+
71
+ if hasattr(config, "checkpoint"):
72
+ checkpoint = config.checkpoint
73
+ elif hasattr(config, "ckpt_pattern"):
74
+ pattern = config.ckpt_pattern
75
+ matches = glob.glob(os.path.join(
76
+ checkpoint_dir, f"*{pattern}*{ckpt_type}*"))
77
+ if not (len(matches) > 0):
78
+ raise ValueError(f"No matches found for the pattern {pattern}")
79
+
80
+ checkpoint = matches[0]
81
+
82
+ else:
83
+ return model
84
+ model = load_wts(model, checkpoint)
85
+ print("Loaded weights from {0}".format(checkpoint))
86
+ return model
87
+
88
+
89
+ def main_worker(gpu, ngpus_per_node, config):
90
+ try:
91
+ fix_random_seed(43)
92
+
93
+ config.gpu = gpu
94
+
95
+ model = build_model(config)
96
+
97
+ # print(model)
98
+
99
+ model = load_ckpt(config, model)
100
+ model = parallelize(config, model)
101
+
102
+ total_params = f"{round(count_parameters(model)/1e6,2)}M"
103
+ config.total_params = total_params
104
+ print(f"Total parameters : {total_params}")
105
+
106
+ train_loader = MixedNYUKITTI(config, "train").data
107
+ test_loader = MixedNYUKITTI(config, "online_eval").data
108
+
109
+ trainer = get_trainer(config)(
110
+ config, model, train_loader, test_loader, device=config.gpu)
111
+
112
+ trainer.train()
113
+ finally:
114
+ import wandb
115
+ wandb.finish()
116
+
117
+
118
+ if __name__ == '__main__':
119
+ mp.set_start_method('forkserver')
120
+
121
+ parser = argparse.ArgumentParser()
122
+ parser.add_argument("-m", "--model", type=str, default="synunet")
123
+ parser.add_argument("-d", "--dataset", type=str, default='mix')
124
+ parser.add_argument("--trainer", type=str, default=None)
125
+
126
+ args, unknown_args = parser.parse_known_args()
127
+ overwrite_kwargs = parse_unknown(unknown_args)
128
+
129
+ overwrite_kwargs["model"] = args.model
130
+ if args.trainer is not None:
131
+ overwrite_kwargs["trainer"] = args.trainer
132
+
133
+ config = get_config(args.model, "train", args.dataset, **overwrite_kwargs)
134
+ # git_commit()
135
+ if config.use_shared_dict:
136
+ shared_dict = mp.Manager().dict()
137
+ else:
138
+ shared_dict = None
139
+ config.shared_dict = shared_dict
140
+
141
+ config.batch_size = config.bs
142
+ config.mode = 'train'
143
+ if config.root != "." and not os.path.isdir(config.root):
144
+ os.makedirs(config.root)
145
+
146
+ try:
147
+ node_str = os.environ['SLURM_JOB_NODELIST'].replace(
148
+ '[', '').replace(']', '')
149
+ nodes = node_str.split(',')
150
+
151
+ config.world_size = len(nodes)
152
+ config.rank = int(os.environ['SLURM_PROCID'])
153
+ # config.save_dir = "/ibex/scratch/bhatsf/videodepth/checkpoints"
154
+
155
+ except KeyError as e:
156
+ # We are NOT using SLURM
157
+ config.world_size = 1
158
+ config.rank = 0
159
+ nodes = ["127.0.0.1"]
160
+
161
+ if config.distributed:
162
+
163
+ print(config.rank)
164
+ port = np.random.randint(15000, 15025)
165
+ config.dist_url = 'tcp://{}:{}'.format(nodes[0], port)
166
+ print(config.dist_url)
167
+ config.dist_backend = 'nccl'
168
+ config.gpu = None
169
+
170
+ ngpus_per_node = torch.cuda.device_count()
171
+ config.num_workers = config.workers
172
+ config.ngpus_per_node = ngpus_per_node
173
+ print("Config:")
174
+ pprint(config)
175
+ if config.distributed:
176
+ config.world_size = ngpus_per_node * config.world_size
177
+ mp.spawn(main_worker, nprocs=ngpus_per_node,
178
+ args=(ngpus_per_node, config))
179
+ else:
180
+ if ngpus_per_node == 1:
181
+ config.gpu = 0
182
+ main_worker(config.gpu, ngpus_per_node, config)
metric_depth/train_mono.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ from zoedepth.utils.misc import count_parameters, parallelize
26
+ from zoedepth.utils.config import get_config
27
+ from zoedepth.utils.arg_utils import parse_unknown
28
+ from zoedepth.trainers.builder import get_trainer
29
+ from zoedepth.models.builder import build_model
30
+ from zoedepth.data.data_mono import DepthDataLoader
31
+ import torch.utils.data.distributed
32
+ import torch.multiprocessing as mp
33
+ import torch
34
+ import numpy as np
35
+ from pprint import pprint
36
+ import argparse
37
+ import os
38
+
39
+ os.environ["PYOPENGL_PLATFORM"] = "egl"
40
+ os.environ["WANDB_START_METHOD"] = "thread"
41
+
42
+
43
+ def fix_random_seed(seed: int):
44
+ import random
45
+
46
+ import numpy
47
+ import torch
48
+
49
+ random.seed(seed)
50
+ numpy.random.seed(seed)
51
+ torch.manual_seed(seed)
52
+ torch.cuda.manual_seed(seed)
53
+ torch.cuda.manual_seed_all(seed)
54
+
55
+ torch.backends.cudnn.deterministic = True
56
+ torch.backends.cudnn.benchmark = True
57
+
58
+
59
+ def load_ckpt(config, model, checkpoint_dir="./checkpoints", ckpt_type="best"):
60
+ import glob
61
+ import os
62
+
63
+ from zoedepth.models.model_io import load_wts
64
+
65
+ if hasattr(config, "checkpoint"):
66
+ checkpoint = config.checkpoint
67
+ elif hasattr(config, "ckpt_pattern"):
68
+ pattern = config.ckpt_pattern
69
+ matches = glob.glob(os.path.join(
70
+ checkpoint_dir, f"*{pattern}*{ckpt_type}*"))
71
+ if not (len(matches) > 0):
72
+ raise ValueError(f"No matches found for the pattern {pattern}")
73
+
74
+ checkpoint = matches[0]
75
+
76
+ else:
77
+ return model
78
+ model = load_wts(model, checkpoint)
79
+ print("Loaded weights from {0}".format(checkpoint))
80
+ return model
81
+
82
+
83
+ def main_worker(gpu, ngpus_per_node, config):
84
+ try:
85
+ seed = config.seed if 'seed' in config and config.seed else 43
86
+ fix_random_seed(seed)
87
+
88
+ config.gpu = gpu
89
+
90
+ model = build_model(config)
91
+ # print(model)
92
+
93
+ model = load_ckpt(config, model)
94
+ model = parallelize(config, model)
95
+
96
+ total_params = f"{round(count_parameters(model)/1e6,2)}M"
97
+ config.total_params = total_params
98
+ print(f"Total parameters : {total_params}")
99
+
100
+ train_loader = DepthDataLoader(config, "train").data
101
+ test_loader = DepthDataLoader(config, "online_eval").data
102
+
103
+ trainer = get_trainer(config)(
104
+ config, model, train_loader, test_loader, device=config.gpu)
105
+
106
+ trainer.train()
107
+ finally:
108
+ import wandb
109
+ wandb.finish()
110
+
111
+
112
+ if __name__ == '__main__':
113
+ mp.set_start_method('forkserver')
114
+
115
+ parser = argparse.ArgumentParser()
116
+ parser.add_argument("-m", "--model", type=str, default="synunet")
117
+ parser.add_argument("-d", "--dataset", type=str, default='nyu')
118
+ parser.add_argument("--trainer", type=str, default=None)
119
+
120
+ args, unknown_args = parser.parse_known_args()
121
+ overwrite_kwargs = parse_unknown(unknown_args)
122
+
123
+ overwrite_kwargs["model"] = args.model
124
+ if args.trainer is not None:
125
+ overwrite_kwargs["trainer"] = args.trainer
126
+
127
+ config = get_config(args.model, "train", args.dataset, **overwrite_kwargs)
128
+ # git_commit()
129
+ if config.use_shared_dict:
130
+ shared_dict = mp.Manager().dict()
131
+ else:
132
+ shared_dict = None
133
+ config.shared_dict = shared_dict
134
+
135
+ config.batch_size = config.bs
136
+ config.mode = 'train'
137
+ if config.root != "." and not os.path.isdir(config.root):
138
+ os.makedirs(config.root)
139
+
140
+ try:
141
+ node_str = os.environ['SLURM_JOB_NODELIST'].replace(
142
+ '[', '').replace(']', '')
143
+ nodes = node_str.split(',')
144
+
145
+ config.world_size = len(nodes)
146
+ config.rank = int(os.environ['SLURM_PROCID'])
147
+ # config.save_dir = "/ibex/scratch/bhatsf/videodepth/checkpoints"
148
+
149
+ except KeyError as e:
150
+ # We are NOT using SLURM
151
+ config.world_size = 1
152
+ config.rank = 0
153
+ nodes = ["127.0.0.1"]
154
+
155
+ if config.distributed:
156
+
157
+ print(config.rank)
158
+ port = np.random.randint(15000, 15025)
159
+ config.dist_url = 'tcp://{}:{}'.format(nodes[0], port)
160
+ print(config.dist_url)
161
+ config.dist_backend = 'nccl'
162
+ config.gpu = None
163
+
164
+ ngpus_per_node = torch.cuda.device_count()
165
+ config.num_workers = config.workers
166
+ config.ngpus_per_node = ngpus_per_node
167
+ print("Config:")
168
+ pprint(config)
169
+ if config.distributed:
170
+ config.world_size = ngpus_per_node * config.world_size
171
+ mp.spawn(main_worker, nprocs=ngpus_per_node,
172
+ args=(ngpus_per_node, config))
173
+ else:
174
+ if ngpus_per_node == 1:
175
+ config.gpu = 0
176
+ main_worker(config.gpu, ngpus_per_node, config)
metric_depth/train_test_inputs/kitti_eigen_test_files_with_gt.txt ADDED
The diff for this file is too large to render. See raw diff
 
metric_depth/train_test_inputs/kitti_eigen_train_files_with_gt.txt ADDED
The diff for this file is too large to render. See raw diff
 
metric_depth/train_test_inputs/nyudepthv2_test_files_with_gt.txt ADDED
@@ -0,0 +1,654 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bathroom/rgb_00045.jpg bathroom/sync_depth_00045.png 518.8579
2
+ bathroom/rgb_00046.jpg bathroom/sync_depth_00046.png 518.8579
3
+ bathroom/rgb_00507.jpg bathroom/sync_depth_00507.png 518.8579
4
+ bathroom/rgb_00508.jpg bathroom/sync_depth_00508.png 518.8579
5
+ bathroom/rgb_00509.jpg bathroom/sync_depth_00509.png 518.8579
6
+ bathroom/rgb_00510.jpg bathroom/sync_depth_00510.png 518.8579
7
+ bathroom/rgb_00511.jpg bathroom/sync_depth_00511.png 518.8579
8
+ bathroom/rgb_00512.jpg bathroom/sync_depth_00512.png 518.8579
9
+ bathroom/rgb_00649.jpg bathroom/sync_depth_00649.png 518.8579
10
+ bathroom/rgb_00650.jpg bathroom/sync_depth_00650.png 518.8579
11
+ bathroom/rgb_00655.jpg bathroom/sync_depth_00655.png 518.8579
12
+ bathroom/rgb_00656.jpg bathroom/sync_depth_00656.png 518.8579
13
+ bathroom/rgb_00657.jpg bathroom/sync_depth_00657.png 518.8579
14
+ bathroom/rgb_00662.jpg bathroom/sync_depth_00662.png 518.8579
15
+ bathroom/rgb_00663.jpg bathroom/sync_depth_00663.png 518.8579
16
+ bathroom/rgb_00667.jpg bathroom/sync_depth_00667.png 518.8579
17
+ bathroom/rgb_00668.jpg bathroom/sync_depth_00668.png 518.8579
18
+ bathroom/rgb_00670.jpg bathroom/sync_depth_00670.png 518.8579
19
+ bathroom/rgb_00671.jpg bathroom/sync_depth_00671.png 518.8579
20
+ bathroom/rgb_00672.jpg bathroom/sync_depth_00672.png 518.8579
21
+ bathroom/rgb_00675.jpg bathroom/sync_depth_00675.png 518.8579
22
+ bathroom/rgb_00676.jpg bathroom/sync_depth_00676.png 518.8579
23
+ bathroom/rgb_00677.jpg bathroom/sync_depth_00677.png 518.8579
24
+ bathroom/rgb_00678.jpg bathroom/sync_depth_00678.png 518.8579
25
+ bathroom/rgb_00679.jpg bathroom/sync_depth_00679.png 518.8579
26
+ bathroom/rgb_00680.jpg bathroom/sync_depth_00680.png 518.8579
27
+ bathroom/rgb_00685.jpg bathroom/sync_depth_00685.png 518.8579
28
+ bathroom/rgb_00686.jpg bathroom/sync_depth_00686.png 518.8579
29
+ bathroom/rgb_00687.jpg bathroom/sync_depth_00687.png 518.8579
30
+ bathroom/rgb_00688.jpg bathroom/sync_depth_00688.png 518.8579
31
+ bathroom/rgb_00689.jpg bathroom/sync_depth_00689.png 518.8579
32
+ bathroom/rgb_00692.jpg bathroom/sync_depth_00692.png 518.8579
33
+ bathroom/rgb_00693.jpg bathroom/sync_depth_00693.png 518.8579
34
+ bathroom/rgb_00696.jpg bathroom/sync_depth_00696.png 518.8579
35
+ bathroom/rgb_00669.jpg bathroom/sync_depth_00669.png 518.8579
36
+ bathroom/rgb_00697.jpg bathroom/sync_depth_00697.png 518.8579
37
+ bathroom/rgb_00698.jpg bathroom/sync_depth_00698.png 518.8579
38
+ bathroom/rgb_00705.jpg bathroom/sync_depth_00705.png 518.8579
39
+ bathroom/rgb_00706.jpg bathroom/sync_depth_00706.png 518.8579
40
+ bathroom/rgb_00707.jpg bathroom/sync_depth_00707.png 518.8579
41
+ bathroom/rgb_00708.jpg bathroom/sync_depth_00708.png 518.8579
42
+ bathroom/rgb_00709.jpg bathroom/sync_depth_00709.png 518.8579
43
+ bathroom/rgb_00710.jpg bathroom/sync_depth_00710.png 518.8579
44
+ bathroom/rgb_00711.jpg bathroom/sync_depth_00711.png 518.8579
45
+ bathroom/rgb_00712.jpg bathroom/sync_depth_00712.png 518.8579
46
+ bathroom/rgb_00716.jpg bathroom/sync_depth_00716.png 518.8579
47
+ bathroom/rgb_00717.jpg bathroom/sync_depth_00717.png 518.8579
48
+ bathroom/rgb_00723.jpg bathroom/sync_depth_00723.png 518.8579
49
+ bathroom/rgb_00724.jpg bathroom/sync_depth_00724.png 518.8579
50
+ bathroom/rgb_00725.jpg bathroom/sync_depth_00725.png 518.8579
51
+ bathroom/rgb_00726.jpg bathroom/sync_depth_00726.png 518.8579
52
+ bathroom/rgb_00727.jpg bathroom/sync_depth_00727.png 518.8579
53
+ bathroom/rgb_00730.jpg bathroom/sync_depth_00730.png 518.8579
54
+ bathroom/rgb_00731.jpg bathroom/sync_depth_00731.png 518.8579
55
+ bathroom/rgb_00732.jpg bathroom/sync_depth_00732.png 518.8579
56
+ bathroom/rgb_00733.jpg bathroom/sync_depth_00733.png 518.8579
57
+ bathroom/rgb_00742.jpg bathroom/sync_depth_00742.png 518.8579
58
+ bathroom/rgb_00743.jpg bathroom/sync_depth_00743.png 518.8579
59
+ bedroom/rgb_00055.jpg bedroom/sync_depth_00055.png 518.8579
60
+ bedroom/rgb_00056.jpg bedroom/sync_depth_00056.png 518.8579
61
+ bedroom/rgb_00058.jpg bedroom/sync_depth_00058.png 518.8579
62
+ bedroom/rgb_00059.jpg bedroom/sync_depth_00059.png 518.8579
63
+ bedroom/rgb_00060.jpg bedroom/sync_depth_00060.png 518.8579
64
+ bedroom/rgb_00061.jpg bedroom/sync_depth_00061.png 518.8579
65
+ bedroom/rgb_00062.jpg bedroom/sync_depth_00062.png 518.8579
66
+ bedroom/rgb_00075.jpg bedroom/sync_depth_00075.png 518.8579
67
+ bedroom/rgb_00076.jpg bedroom/sync_depth_00076.png 518.8579
68
+ bedroom/rgb_00077.jpg bedroom/sync_depth_00077.png 518.8579
69
+ bedroom/rgb_00078.jpg bedroom/sync_depth_00078.png 518.8579
70
+ bedroom/rgb_00170.jpg bedroom/sync_depth_00170.png 518.8579
71
+ bedroom/rgb_00171.jpg bedroom/sync_depth_00171.png 518.8579
72
+ bedroom/rgb_00172.jpg bedroom/sync_depth_00172.png 518.8579
73
+ bedroom/rgb_00173.jpg bedroom/sync_depth_00173.png 518.8579
74
+ bedroom/rgb_00174.jpg bedroom/sync_depth_00174.png 518.8579
75
+ bedroom/rgb_00175.jpg bedroom/sync_depth_00175.png 518.8579
76
+ bedroom/rgb_00180.jpg bedroom/sync_depth_00180.png 518.8579
77
+ bedroom/rgb_00181.jpg bedroom/sync_depth_00181.png 518.8579
78
+ bedroom/rgb_00182.jpg bedroom/sync_depth_00182.png 518.8579
79
+ bedroom/rgb_00183.jpg bedroom/sync_depth_00183.png 518.8579
80
+ bedroom/rgb_00184.jpg bedroom/sync_depth_00184.png 518.8579
81
+ bedroom/rgb_00185.jpg bedroom/sync_depth_00185.png 518.8579
82
+ bedroom/rgb_00186.jpg bedroom/sync_depth_00186.png 518.8579
83
+ bedroom/rgb_00187.jpg bedroom/sync_depth_00187.png 518.8579
84
+ bedroom/rgb_00188.jpg bedroom/sync_depth_00188.png 518.8579
85
+ bedroom/rgb_00189.jpg bedroom/sync_depth_00189.png 518.8579
86
+ bedroom/rgb_00190.jpg bedroom/sync_depth_00190.png 518.8579
87
+ bedroom/rgb_00191.jpg bedroom/sync_depth_00191.png 518.8579
88
+ bedroom/rgb_00192.jpg bedroom/sync_depth_00192.png 518.8579
89
+ bedroom/rgb_00219.jpg bedroom/sync_depth_00219.png 518.8579
90
+ bedroom/rgb_00220.jpg bedroom/sync_depth_00220.png 518.8579
91
+ bedroom/rgb_00221.jpg bedroom/sync_depth_00221.png 518.8579
92
+ bedroom/rgb_00279.jpg bedroom/sync_depth_00279.png 518.8579
93
+ bedroom/rgb_00179.jpg bedroom/sync_depth_00179.png 518.8579
94
+ bedroom/rgb_00280.jpg bedroom/sync_depth_00280.png 518.8579
95
+ bedroom/rgb_00536.jpg bedroom/sync_depth_00536.png 518.8579
96
+ bedroom/rgb_00960.jpg bedroom/sync_depth_00960.png 518.8579
97
+ bedroom/rgb_01000.jpg bedroom/sync_depth_01000.png 518.8579
98
+ bedroom/rgb_01052.jpg bedroom/sync_depth_01052.png 518.8579
99
+ bedroom/rgb_01092.jpg bedroom/sync_depth_01092.png 518.8579
100
+ bedroom/rgb_01122.jpg bedroom/sync_depth_01122.png 518.8579
101
+ bedroom/rgb_01150.jpg bedroom/sync_depth_01150.png 518.8579
102
+ bedroom/rgb_00281.jpg bedroom/sync_depth_00281.png 518.8579
103
+ bedroom/rgb_00282.jpg bedroom/sync_depth_00282.png 518.8579
104
+ bedroom/rgb_00514.jpg bedroom/sync_depth_00514.png 518.8579
105
+ bedroom/rgb_00515.jpg bedroom/sync_depth_00515.png 518.8579
106
+ bedroom/rgb_00516.jpg bedroom/sync_depth_00516.png 518.8579
107
+ bedroom/rgb_00517.jpg bedroom/sync_depth_00517.png 518.8579
108
+ bedroom/rgb_00518.jpg bedroom/sync_depth_00518.png 518.8579
109
+ bedroom/rgb_00519.jpg bedroom/sync_depth_00519.png 518.8579
110
+ bedroom/rgb_00520.jpg bedroom/sync_depth_00520.png 518.8579
111
+ bedroom/rgb_00521.jpg bedroom/sync_depth_00521.png 518.8579
112
+ bedroom/rgb_00522.jpg bedroom/sync_depth_00522.png 518.8579
113
+ bedroom/rgb_00523.jpg bedroom/sync_depth_00523.png 518.8579
114
+ bedroom/rgb_00524.jpg bedroom/sync_depth_00524.png 518.8579
115
+ bedroom/rgb_00525.jpg bedroom/sync_depth_00525.png 518.8579
116
+ bedroom/rgb_00530.jpg bedroom/sync_depth_00530.png 518.8579
117
+ bedroom/rgb_00531.jpg bedroom/sync_depth_00531.png 518.8579
118
+ bedroom/rgb_00532.jpg bedroom/sync_depth_00532.png 518.8579
119
+ bedroom/rgb_00537.jpg bedroom/sync_depth_00537.png 518.8579
120
+ bedroom/rgb_00538.jpg bedroom/sync_depth_00538.png 518.8579
121
+ bedroom/rgb_00916.jpg bedroom/sync_depth_00916.png 518.8579
122
+ bedroom/rgb_00917.jpg bedroom/sync_depth_00917.png 518.8579
123
+ bedroom/rgb_00918.jpg bedroom/sync_depth_00918.png 518.8579
124
+ bedroom/rgb_00925.jpg bedroom/sync_depth_00925.png 518.8579
125
+ bedroom/rgb_00926.jpg bedroom/sync_depth_00926.png 518.8579
126
+ bedroom/rgb_00927.jpg bedroom/sync_depth_00927.png 518.8579
127
+ bedroom/rgb_00931.jpg bedroom/sync_depth_00931.png 518.8579
128
+ bedroom/rgb_00932.jpg bedroom/sync_depth_00932.png 518.8579
129
+ bedroom/rgb_00933.jpg bedroom/sync_depth_00933.png 518.8579
130
+ bedroom/rgb_00934.jpg bedroom/sync_depth_00934.png 518.8579
131
+ bedroom/rgb_00944.jpg bedroom/sync_depth_00944.png 518.8579
132
+ bedroom/rgb_00945.jpg bedroom/sync_depth_00945.png 518.8579
133
+ bedroom/rgb_00946.jpg bedroom/sync_depth_00946.png 518.8579
134
+ bedroom/rgb_00958.jpg bedroom/sync_depth_00958.png 518.8579
135
+ bedroom/rgb_00959.jpg bedroom/sync_depth_00959.png 518.8579
136
+ bedroom/rgb_00961.jpg bedroom/sync_depth_00961.png 518.8579
137
+ bedroom/rgb_00964.jpg bedroom/sync_depth_00964.png 518.8579
138
+ bedroom/rgb_00965.jpg bedroom/sync_depth_00965.png 518.8579
139
+ bedroom/rgb_00966.jpg bedroom/sync_depth_00966.png 518.8579
140
+ bedroom/rgb_00969.jpg bedroom/sync_depth_00969.png 518.8579
141
+ bedroom/rgb_00970.jpg bedroom/sync_depth_00970.png 518.8579
142
+ bedroom/rgb_00971.jpg bedroom/sync_depth_00971.png 518.8579
143
+ bedroom/rgb_00972.jpg bedroom/sync_depth_00972.png 518.8579
144
+ bedroom/rgb_00973.jpg bedroom/sync_depth_00973.png 518.8579
145
+ bedroom/rgb_00974.jpg bedroom/sync_depth_00974.png 518.8579
146
+ bedroom/rgb_00975.jpg bedroom/sync_depth_00975.png 518.8579
147
+ bedroom/rgb_00976.jpg bedroom/sync_depth_00976.png 518.8579
148
+ bedroom/rgb_00990.jpg bedroom/sync_depth_00990.png 518.8579
149
+ bedroom/rgb_00991.jpg bedroom/sync_depth_00991.png 518.8579
150
+ bedroom/rgb_00992.jpg bedroom/sync_depth_00992.png 518.8579
151
+ bedroom/rgb_00993.jpg bedroom/sync_depth_00993.png 518.8579
152
+ bedroom/rgb_00994.jpg bedroom/sync_depth_00994.png 518.8579
153
+ bedroom/rgb_01001.jpg bedroom/sync_depth_01001.png 518.8579
154
+ bedroom/rgb_01002.jpg bedroom/sync_depth_01002.png 518.8579
155
+ bedroom/rgb_01003.jpg bedroom/sync_depth_01003.png 518.8579
156
+ bedroom/rgb_01009.jpg bedroom/sync_depth_01009.png 518.8579
157
+ bedroom/rgb_01010.jpg bedroom/sync_depth_01010.png 518.8579
158
+ bedroom/rgb_01011.jpg bedroom/sync_depth_01011.png 518.8579
159
+ bedroom/rgb_01020.jpg bedroom/sync_depth_01020.png 518.8579
160
+ bedroom/rgb_01021.jpg bedroom/sync_depth_01021.png 518.8579
161
+ bedroom/rgb_01022.jpg bedroom/sync_depth_01022.png 518.8579
162
+ bedroom/rgb_01031.jpg bedroom/sync_depth_01031.png 518.8579
163
+ bedroom/rgb_01032.jpg bedroom/sync_depth_01032.png 518.8579
164
+ bedroom/rgb_01033.jpg bedroom/sync_depth_01033.png 518.8579
165
+ bedroom/rgb_01037.jpg bedroom/sync_depth_01037.png 518.8579
166
+ bedroom/rgb_01038.jpg bedroom/sync_depth_01038.png 518.8579
167
+ bedroom/rgb_01047.jpg bedroom/sync_depth_01047.png 518.8579
168
+ bedroom/rgb_01048.jpg bedroom/sync_depth_01048.png 518.8579
169
+ bedroom/rgb_01051.jpg bedroom/sync_depth_01051.png 518.8579
170
+ bedroom/rgb_01056.jpg bedroom/sync_depth_01056.png 518.8579
171
+ bedroom/rgb_01057.jpg bedroom/sync_depth_01057.png 518.8579
172
+ bedroom/rgb_01074.jpg bedroom/sync_depth_01074.png 518.8579
173
+ bedroom/rgb_01075.jpg bedroom/sync_depth_01075.png 518.8579
174
+ bedroom/rgb_01076.jpg bedroom/sync_depth_01076.png 518.8579
175
+ bedroom/rgb_01077.jpg bedroom/sync_depth_01077.png 518.8579
176
+ bedroom/rgb_01078.jpg bedroom/sync_depth_01078.png 518.8579
177
+ bedroom/rgb_01079.jpg bedroom/sync_depth_01079.png 518.8579
178
+ bedroom/rgb_01080.jpg bedroom/sync_depth_01080.png 518.8579
179
+ bedroom/rgb_01081.jpg bedroom/sync_depth_01081.png 518.8579
180
+ bedroom/rgb_01082.jpg bedroom/sync_depth_01082.png 518.8579
181
+ bedroom/rgb_01083.jpg bedroom/sync_depth_01083.png 518.8579
182
+ bedroom/rgb_01087.jpg bedroom/sync_depth_01087.png 518.8579
183
+ bedroom/rgb_01088.jpg bedroom/sync_depth_01088.png 518.8579
184
+ bedroom/rgb_01089.jpg bedroom/sync_depth_01089.png 518.8579
185
+ bedroom/rgb_01090.jpg bedroom/sync_depth_01090.png 518.8579
186
+ bedroom/rgb_01091.jpg bedroom/sync_depth_01091.png 518.8579
187
+ bedroom/rgb_01093.jpg bedroom/sync_depth_01093.png 518.8579
188
+ bedroom/rgb_01094.jpg bedroom/sync_depth_01094.png 518.8579
189
+ bedroom/rgb_01095.jpg bedroom/sync_depth_01095.png 518.8579
190
+ bedroom/rgb_01097.jpg bedroom/sync_depth_01097.png 518.8579
191
+ bedroom/rgb_01098.jpg bedroom/sync_depth_01098.png 518.8579
192
+ bedroom/rgb_01099.jpg bedroom/sync_depth_01099.png 518.8579
193
+ bedroom/rgb_01100.jpg bedroom/sync_depth_01100.png 518.8579
194
+ bedroom/rgb_01101.jpg bedroom/sync_depth_01101.png 518.8579
195
+ bedroom/rgb_01102.jpg bedroom/sync_depth_01102.png 518.8579
196
+ bedroom/rgb_01103.jpg bedroom/sync_depth_01103.png 518.8579
197
+ bedroom/rgb_01105.jpg bedroom/sync_depth_01105.png 518.8579
198
+ bedroom/rgb_01106.jpg bedroom/sync_depth_01106.png 518.8579
199
+ bedroom/rgb_01107.jpg bedroom/sync_depth_01107.png 518.8579
200
+ bedroom/rgb_01108.jpg bedroom/sync_depth_01108.png 518.8579
201
+ bedroom/rgb_01116.jpg bedroom/sync_depth_01116.png 518.8579
202
+ bedroom/rgb_01117.jpg bedroom/sync_depth_01117.png 518.8579
203
+ bedroom/rgb_01118.jpg bedroom/sync_depth_01118.png 518.8579
204
+ bedroom/rgb_01123.jpg bedroom/sync_depth_01123.png 518.8579
205
+ bedroom/rgb_01124.jpg bedroom/sync_depth_01124.png 518.8579
206
+ bedroom/rgb_01125.jpg bedroom/sync_depth_01125.png 518.8579
207
+ bedroom/rgb_01126.jpg bedroom/sync_depth_01126.png 518.8579
208
+ bedroom/rgb_01127.jpg bedroom/sync_depth_01127.png 518.8579
209
+ bedroom/rgb_01128.jpg bedroom/sync_depth_01128.png 518.8579
210
+ bedroom/rgb_01129.jpg bedroom/sync_depth_01129.png 518.8579
211
+ bedroom/rgb_01130.jpg bedroom/sync_depth_01130.png 518.8579
212
+ bedroom/rgb_01134.jpg bedroom/sync_depth_01134.png 518.8579
213
+ bedroom/rgb_01135.jpg bedroom/sync_depth_01135.png 518.8579
214
+ bedroom/rgb_01143.jpg bedroom/sync_depth_01143.png 518.8579
215
+ bedroom/rgb_01144.jpg bedroom/sync_depth_01144.png 518.8579
216
+ bedroom/rgb_01145.jpg bedroom/sync_depth_01145.png 518.8579
217
+ bedroom/rgb_01146.jpg bedroom/sync_depth_01146.png 518.8579
218
+ bedroom/rgb_01147.jpg bedroom/sync_depth_01147.png 518.8579
219
+ bedroom/rgb_01148.jpg bedroom/sync_depth_01148.png 518.8579
220
+ bedroom/rgb_01149.jpg bedroom/sync_depth_01149.png 518.8579
221
+ bedroom/rgb_01151.jpg bedroom/sync_depth_01151.png 518.8579
222
+ bedroom/rgb_01152.jpg bedroom/sync_depth_01152.png 518.8579
223
+ bedroom/rgb_01153.jpg bedroom/sync_depth_01153.png 518.8579
224
+ bedroom/rgb_01154.jpg bedroom/sync_depth_01154.png 518.8579
225
+ bedroom/rgb_01155.jpg bedroom/sync_depth_01155.png 518.8579
226
+ bedroom/rgb_01156.jpg bedroom/sync_depth_01156.png 518.8579
227
+ bedroom/rgb_01157.jpg bedroom/sync_depth_01157.png 518.8579
228
+ bedroom/rgb_01161.jpg bedroom/sync_depth_01161.png 518.8579
229
+ bedroom/rgb_01162.jpg bedroom/sync_depth_01162.png 518.8579
230
+ bedroom/rgb_01163.jpg bedroom/sync_depth_01163.png 518.8579
231
+ bedroom/rgb_01164.jpg bedroom/sync_depth_01164.png 518.8579
232
+ bedroom/rgb_01165.jpg bedroom/sync_depth_01165.png 518.8579
233
+ bedroom/rgb_01166.jpg bedroom/sync_depth_01166.png 518.8579
234
+ bedroom/rgb_01169.jpg bedroom/sync_depth_01169.png 518.8579
235
+ bedroom/rgb_01170.jpg bedroom/sync_depth_01170.png 518.8579
236
+ bedroom/rgb_01173.jpg bedroom/sync_depth_01173.png 518.8579
237
+ bedroom/rgb_01174.jpg bedroom/sync_depth_01174.png 518.8579
238
+ bedroom/rgb_01175.jpg bedroom/sync_depth_01175.png 518.8579
239
+ bedroom/rgb_01178.jpg bedroom/sync_depth_01178.png 518.8579
240
+ bedroom/rgb_01179.jpg bedroom/sync_depth_01179.png 518.8579
241
+ bedroom/rgb_01180.jpg bedroom/sync_depth_01180.png 518.8579
242
+ bedroom/rgb_01181.jpg bedroom/sync_depth_01181.png 518.8579
243
+ bedroom/rgb_01182.jpg bedroom/sync_depth_01182.png 518.8579
244
+ bedroom/rgb_01183.jpg bedroom/sync_depth_01183.png 518.8579
245
+ bedroom/rgb_01191.jpg bedroom/sync_depth_01191.png 518.8579
246
+ bedroom/rgb_01192.jpg bedroom/sync_depth_01192.png 518.8579
247
+ bedroom/rgb_01193.jpg bedroom/sync_depth_01193.png 518.8579
248
+ bedroom/rgb_01194.jpg bedroom/sync_depth_01194.png 518.8579
249
+ bedroom/rgb_01195.jpg bedroom/sync_depth_01195.png 518.8579
250
+ bookstore/rgb_00083.jpg bookstore/sync_depth_00083.png 518.8579
251
+ bookstore/rgb_00084.jpg bookstore/sync_depth_00084.png 518.8579
252
+ bookstore/rgb_00085.jpg bookstore/sync_depth_00085.png 518.8579
253
+ bookstore/rgb_00086.jpg bookstore/sync_depth_00086.png 518.8579
254
+ bookstore/rgb_00087.jpg bookstore/sync_depth_00087.png 518.8579
255
+ bookstore/rgb_00088.jpg bookstore/sync_depth_00088.png 518.8579
256
+ bookstore/rgb_00089.jpg bookstore/sync_depth_00089.png 518.8579
257
+ bookstore/rgb_00090.jpg bookstore/sync_depth_00090.png 518.8579
258
+ bookstore/rgb_00116.jpg bookstore/sync_depth_00116.png 518.8579
259
+ bookstore/rgb_00117.jpg bookstore/sync_depth_00117.png 518.8579
260
+ bookstore/rgb_00118.jpg bookstore/sync_depth_00118.png 518.8579
261
+ classroom/rgb_00283.jpg classroom/sync_depth_00283.png 518.8579
262
+ classroom/rgb_00284.jpg classroom/sync_depth_00284.png 518.8579
263
+ classroom/rgb_00295.jpg classroom/sync_depth_00295.png 518.8579
264
+ classroom/rgb_00296.jpg classroom/sync_depth_00296.png 518.8579
265
+ classroom/rgb_00297.jpg classroom/sync_depth_00297.png 518.8579
266
+ classroom/rgb_00298.jpg classroom/sync_depth_00298.png 518.8579
267
+ classroom/rgb_00299.jpg classroom/sync_depth_00299.png 518.8579
268
+ classroom/rgb_00300.jpg classroom/sync_depth_00300.png 518.8579
269
+ classroom/rgb_00301.jpg classroom/sync_depth_00301.png 518.8579
270
+ classroom/rgb_00309.jpg classroom/sync_depth_00309.png 518.8579
271
+ classroom/rgb_00310.jpg classroom/sync_depth_00310.png 518.8579
272
+ classroom/rgb_00311.jpg classroom/sync_depth_00311.png 518.8579
273
+ classroom/rgb_00314.jpg classroom/sync_depth_00314.png 518.8579
274
+ classroom/rgb_00315.jpg classroom/sync_depth_00315.png 518.8579
275
+ classroom/rgb_00316.jpg classroom/sync_depth_00316.png 518.8579
276
+ classroom/rgb_00324.jpg classroom/sync_depth_00324.png 518.8579
277
+ classroom/rgb_00325.jpg classroom/sync_depth_00325.png 518.8579
278
+ classroom/rgb_00326.jpg classroom/sync_depth_00326.png 518.8579
279
+ classroom/rgb_00327.jpg classroom/sync_depth_00327.png 518.8579
280
+ classroom/rgb_00328.jpg classroom/sync_depth_00328.png 518.8579
281
+ classroom/rgb_00329.jpg classroom/sync_depth_00329.png 518.8579
282
+ classroom/rgb_00330.jpg classroom/sync_depth_00330.png 518.8579
283
+ classroom/rgb_00331.jpg classroom/sync_depth_00331.png 518.8579
284
+ computer_lab/rgb_00332.jpg computer_lab/sync_depth_00332.png 518.8579
285
+ computer_lab/rgb_00333.jpg computer_lab/sync_depth_00333.png 518.8579
286
+ computer_lab/rgb_00334.jpg computer_lab/sync_depth_00334.png 518.8579
287
+ dining_room/rgb_00548.jpg dining_room/sync_depth_00548.png 518.8579
288
+ dining_room/rgb_00549.jpg dining_room/sync_depth_00549.png 518.8579
289
+ dining_room/rgb_00550.jpg dining_room/sync_depth_00550.png 518.8579
290
+ dining_room/rgb_01346.jpg dining_room/sync_depth_01346.png 518.8579
291
+ dining_room/rgb_01347.jpg dining_room/sync_depth_01347.png 518.8579
292
+ dining_room/rgb_01348.jpg dining_room/sync_depth_01348.png 518.8579
293
+ dining_room/rgb_01352.jpg dining_room/sync_depth_01352.png 518.8579
294
+ dining_room/rgb_01353.jpg dining_room/sync_depth_01353.png 518.8579
295
+ dining_room/rgb_01354.jpg dining_room/sync_depth_01354.png 518.8579
296
+ dining_room/rgb_01355.jpg dining_room/sync_depth_01355.png 518.8579
297
+ dining_room/rgb_01363.jpg dining_room/sync_depth_01363.png 518.8579
298
+ dining_room/rgb_01364.jpg dining_room/sync_depth_01364.png 518.8579
299
+ dining_room/rgb_01367.jpg dining_room/sync_depth_01367.png 518.8579
300
+ dining_room/rgb_01368.jpg dining_room/sync_depth_01368.png 518.8579
301
+ dining_room/rgb_01383.jpg dining_room/sync_depth_01383.png 518.8579
302
+ dining_room/rgb_01384.jpg dining_room/sync_depth_01384.png 518.8579
303
+ dining_room/rgb_01385.jpg dining_room/sync_depth_01385.png 518.8579
304
+ dining_room/rgb_01387.jpg dining_room/sync_depth_01387.png 518.8579
305
+ dining_room/rgb_01388.jpg dining_room/sync_depth_01388.png 518.8579
306
+ dining_room/rgb_01389.jpg dining_room/sync_depth_01389.png 518.8579
307
+ dining_room/rgb_01390.jpg dining_room/sync_depth_01390.png 518.8579
308
+ dining_room/rgb_01393.jpg dining_room/sync_depth_01393.png 518.8579
309
+ dining_room/rgb_01394.jpg dining_room/sync_depth_01394.png 518.8579
310
+ dining_room/rgb_01395.jpg dining_room/sync_depth_01395.png 518.8579
311
+ dining_room/rgb_01396.jpg dining_room/sync_depth_01396.png 518.8579
312
+ dining_room/rgb_01397.jpg dining_room/sync_depth_01397.png 518.8579
313
+ dining_room/rgb_01398.jpg dining_room/sync_depth_01398.png 518.8579
314
+ dining_room/rgb_01399.jpg dining_room/sync_depth_01399.png 518.8579
315
+ dining_room/rgb_01400.jpg dining_room/sync_depth_01400.png 518.8579
316
+ dining_room/rgb_01406.jpg dining_room/sync_depth_01406.png 518.8579
317
+ dining_room/rgb_01407.jpg dining_room/sync_depth_01407.png 518.8579
318
+ dining_room/rgb_01408.jpg dining_room/sync_depth_01408.png 518.8579
319
+ dining_room/rgb_01409.jpg dining_room/sync_depth_01409.png 518.8579
320
+ dining_room/rgb_01410.jpg dining_room/sync_depth_01410.png 518.8579
321
+ dining_room/rgb_01386.jpg dining_room/sync_depth_01386.png 518.8579
322
+ dining_room/rgb_01411.jpg dining_room/sync_depth_01411.png 518.8579
323
+ dining_room/rgb_01412.jpg dining_room/sync_depth_01412.png 518.8579
324
+ dining_room/rgb_01413.jpg dining_room/sync_depth_01413.png 518.8579
325
+ dining_room/rgb_01420.jpg dining_room/sync_depth_01420.png 518.8579
326
+ dining_room/rgb_01421.jpg dining_room/sync_depth_01421.png 518.8579
327
+ dining_room/rgb_01422.jpg dining_room/sync_depth_01422.png 518.8579
328
+ dining_room/rgb_01423.jpg dining_room/sync_depth_01423.png 518.8579
329
+ dining_room/rgb_01429.jpg dining_room/sync_depth_01429.png 518.8579
330
+ dining_room/rgb_01430.jpg dining_room/sync_depth_01430.png 518.8579
331
+ dining_room/rgb_01431.jpg dining_room/sync_depth_01431.png 518.8579
332
+ dining_room/rgb_01432.jpg dining_room/sync_depth_01432.png 518.8579
333
+ dining_room/rgb_01440.jpg dining_room/sync_depth_01440.png 518.8579
334
+ dining_room/rgb_01441.jpg dining_room/sync_depth_01441.png 518.8579
335
+ dining_room/rgb_01442.jpg dining_room/sync_depth_01442.png 518.8579
336
+ dining_room/rgb_01443.jpg dining_room/sync_depth_01443.png 518.8579
337
+ dining_room/rgb_01444.jpg dining_room/sync_depth_01444.png 518.8579
338
+ dining_room/rgb_01445.jpg dining_room/sync_depth_01445.png 518.8579
339
+ dining_room/rgb_01446.jpg dining_room/sync_depth_01446.png 518.8579
340
+ dining_room/rgb_01447.jpg dining_room/sync_depth_01447.png 518.8579
341
+ dining_room/rgb_01448.jpg dining_room/sync_depth_01448.png 518.8579
342
+ foyer/rgb_00350.jpg foyer/sync_depth_00350.png 518.8579
343
+ foyer/rgb_00351.jpg foyer/sync_depth_00351.png 518.8579
344
+ home_office/rgb_00354.jpg home_office/sync_depth_00354.png 518.8579
345
+ home_office/rgb_00355.jpg home_office/sync_depth_00355.png 518.8579
346
+ home_office/rgb_00356.jpg home_office/sync_depth_00356.png 518.8579
347
+ home_office/rgb_00357.jpg home_office/sync_depth_00357.png 518.8579
348
+ home_office/rgb_00358.jpg home_office/sync_depth_00358.png 518.8579
349
+ home_office/rgb_00359.jpg home_office/sync_depth_00359.png 518.8579
350
+ home_office/rgb_00360.jpg home_office/sync_depth_00360.png 518.8579
351
+ home_office/rgb_00361.jpg home_office/sync_depth_00361.png 518.8579
352
+ home_office/rgb_00362.jpg home_office/sync_depth_00362.png 518.8579
353
+ home_office/rgb_00363.jpg home_office/sync_depth_00363.png 518.8579
354
+ home_office/rgb_00383.jpg home_office/sync_depth_00383.png 518.8579
355
+ home_office/rgb_00384.jpg home_office/sync_depth_00384.png 518.8579
356
+ home_office/rgb_00385.jpg home_office/sync_depth_00385.png 518.8579
357
+ home_office/rgb_00386.jpg home_office/sync_depth_00386.png 518.8579
358
+ home_office/rgb_00387.jpg home_office/sync_depth_00387.png 518.8579
359
+ home_office/rgb_00388.jpg home_office/sync_depth_00388.png 518.8579
360
+ home_office/rgb_00389.jpg home_office/sync_depth_00389.png 518.8579
361
+ home_office/rgb_00394.jpg home_office/sync_depth_00394.png 518.8579
362
+ home_office/rgb_00395.jpg home_office/sync_depth_00395.png 518.8579
363
+ home_office/rgb_00396.jpg home_office/sync_depth_00396.png 518.8579
364
+ home_office/rgb_00554.jpg home_office/sync_depth_00554.png 518.8579
365
+ home_office/rgb_00555.jpg home_office/sync_depth_00555.png 518.8579
366
+ home_office/rgb_00556.jpg home_office/sync_depth_00556.png 518.8579
367
+ home_office/rgb_00557.jpg home_office/sync_depth_00557.png 518.8579
368
+ kitchen/rgb_00000.jpg kitchen/sync_depth_00000.png 518.8579
369
+ kitchen/rgb_00001.jpg kitchen/sync_depth_00001.png 518.8579
370
+ kitchen/rgb_00124.jpg kitchen/sync_depth_00124.png 518.8579
371
+ kitchen/rgb_00125.jpg kitchen/sync_depth_00125.png 518.8579
372
+ kitchen/rgb_00126.jpg kitchen/sync_depth_00126.png 518.8579
373
+ kitchen/rgb_00127.jpg kitchen/sync_depth_00127.png 518.8579
374
+ kitchen/rgb_00128.jpg kitchen/sync_depth_00128.png 518.8579
375
+ kitchen/rgb_00130.jpg kitchen/sync_depth_00130.png 518.8579
376
+ kitchen/rgb_00131.jpg kitchen/sync_depth_00131.png 518.8579
377
+ kitchen/rgb_00132.jpg kitchen/sync_depth_00132.png 518.8579
378
+ kitchen/rgb_00133.jpg kitchen/sync_depth_00133.png 518.8579
379
+ kitchen/rgb_00136.jpg kitchen/sync_depth_00136.png 518.8579
380
+ kitchen/rgb_00193.jpg kitchen/sync_depth_00193.png 518.8579
381
+ kitchen/rgb_00194.jpg kitchen/sync_depth_00194.png 518.8579
382
+ kitchen/rgb_00195.jpg kitchen/sync_depth_00195.png 518.8579
383
+ kitchen/rgb_00196.jpg kitchen/sync_depth_00196.png 518.8579
384
+ kitchen/rgb_00197.jpg kitchen/sync_depth_00197.png 518.8579
385
+ kitchen/rgb_00199.jpg kitchen/sync_depth_00199.png 518.8579
386
+ kitchen/rgb_00200.jpg kitchen/sync_depth_00200.png 518.8579
387
+ kitchen/rgb_00201.jpg kitchen/sync_depth_00201.png 518.8579
388
+ kitchen/rgb_00249.jpg kitchen/sync_depth_00249.png 518.8579
389
+ kitchen/rgb_00558.jpg kitchen/sync_depth_00558.png 518.8579
390
+ kitchen/rgb_00559.jpg kitchen/sync_depth_00559.png 518.8579
391
+ kitchen/rgb_00560.jpg kitchen/sync_depth_00560.png 518.8579
392
+ kitchen/rgb_00561.jpg kitchen/sync_depth_00561.png 518.8579
393
+ kitchen/rgb_00562.jpg kitchen/sync_depth_00562.png 518.8579
394
+ kitchen/rgb_00563.jpg kitchen/sync_depth_00563.png 518.8579
395
+ kitchen/rgb_00564.jpg kitchen/sync_depth_00564.png 518.8579
396
+ kitchen/rgb_00565.jpg kitchen/sync_depth_00565.png 518.8579
397
+ kitchen/rgb_00566.jpg kitchen/sync_depth_00566.png 518.8579
398
+ kitchen/rgb_00567.jpg kitchen/sync_depth_00567.png 518.8579
399
+ kitchen/rgb_00568.jpg kitchen/sync_depth_00568.png 518.8579
400
+ kitchen/rgb_00569.jpg kitchen/sync_depth_00569.png 518.8579
401
+ kitchen/rgb_00570.jpg kitchen/sync_depth_00570.png 518.8579
402
+ kitchen/rgb_00198.jpg kitchen/sync_depth_00198.png 518.8579
403
+ kitchen/rgb_00758.jpg kitchen/sync_depth_00758.png 518.8579
404
+ kitchen/rgb_00776.jpg kitchen/sync_depth_00776.png 518.8579
405
+ kitchen/rgb_00811.jpg kitchen/sync_depth_00811.png 518.8579
406
+ kitchen/rgb_00844.jpg kitchen/sync_depth_00844.png 518.8579
407
+ kitchen/rgb_00759.jpg kitchen/sync_depth_00759.png 518.8579
408
+ kitchen/rgb_00760.jpg kitchen/sync_depth_00760.png 518.8579
409
+ kitchen/rgb_00761.jpg kitchen/sync_depth_00761.png 518.8579
410
+ kitchen/rgb_00762.jpg kitchen/sync_depth_00762.png 518.8579
411
+ kitchen/rgb_00763.jpg kitchen/sync_depth_00763.png 518.8579
412
+ kitchen/rgb_00764.jpg kitchen/sync_depth_00764.png 518.8579
413
+ kitchen/rgb_00765.jpg kitchen/sync_depth_00765.png 518.8579
414
+ kitchen/rgb_00766.jpg kitchen/sync_depth_00766.png 518.8579
415
+ kitchen/rgb_00767.jpg kitchen/sync_depth_00767.png 518.8579
416
+ kitchen/rgb_00768.jpg kitchen/sync_depth_00768.png 518.8579
417
+ kitchen/rgb_00769.jpg kitchen/sync_depth_00769.png 518.8579
418
+ kitchen/rgb_00770.jpg kitchen/sync_depth_00770.png 518.8579
419
+ kitchen/rgb_00771.jpg kitchen/sync_depth_00771.png 518.8579
420
+ kitchen/rgb_00772.jpg kitchen/sync_depth_00772.png 518.8579
421
+ kitchen/rgb_00773.jpg kitchen/sync_depth_00773.png 518.8579
422
+ kitchen/rgb_00774.jpg kitchen/sync_depth_00774.png 518.8579
423
+ kitchen/rgb_00775.jpg kitchen/sync_depth_00775.png 518.8579
424
+ kitchen/rgb_00777.jpg kitchen/sync_depth_00777.png 518.8579
425
+ kitchen/rgb_00778.jpg kitchen/sync_depth_00778.png 518.8579
426
+ kitchen/rgb_00779.jpg kitchen/sync_depth_00779.png 518.8579
427
+ kitchen/rgb_00780.jpg kitchen/sync_depth_00780.png 518.8579
428
+ kitchen/rgb_00781.jpg kitchen/sync_depth_00781.png 518.8579
429
+ kitchen/rgb_00782.jpg kitchen/sync_depth_00782.png 518.8579
430
+ kitchen/rgb_00783.jpg kitchen/sync_depth_00783.png 518.8579
431
+ kitchen/rgb_00784.jpg kitchen/sync_depth_00784.png 518.8579
432
+ kitchen/rgb_00785.jpg kitchen/sync_depth_00785.png 518.8579
433
+ kitchen/rgb_00786.jpg kitchen/sync_depth_00786.png 518.8579
434
+ kitchen/rgb_00799.jpg kitchen/sync_depth_00799.png 518.8579
435
+ kitchen/rgb_00800.jpg kitchen/sync_depth_00800.png 518.8579
436
+ kitchen/rgb_00801.jpg kitchen/sync_depth_00801.png 518.8579
437
+ kitchen/rgb_00802.jpg kitchen/sync_depth_00802.png 518.8579
438
+ kitchen/rgb_00803.jpg kitchen/sync_depth_00803.png 518.8579
439
+ kitchen/rgb_00809.jpg kitchen/sync_depth_00809.png 518.8579
440
+ kitchen/rgb_00810.jpg kitchen/sync_depth_00810.png 518.8579
441
+ kitchen/rgb_00812.jpg kitchen/sync_depth_00812.png 518.8579
442
+ kitchen/rgb_00813.jpg kitchen/sync_depth_00813.png 518.8579
443
+ kitchen/rgb_00820.jpg kitchen/sync_depth_00820.png 518.8579
444
+ kitchen/rgb_00821.jpg kitchen/sync_depth_00821.png 518.8579
445
+ kitchen/rgb_00822.jpg kitchen/sync_depth_00822.png 518.8579
446
+ kitchen/rgb_00832.jpg kitchen/sync_depth_00832.png 518.8579
447
+ kitchen/rgb_00833.jpg kitchen/sync_depth_00833.png 518.8579
448
+ kitchen/rgb_00834.jpg kitchen/sync_depth_00834.png 518.8579
449
+ kitchen/rgb_00835.jpg kitchen/sync_depth_00835.png 518.8579
450
+ kitchen/rgb_00836.jpg kitchen/sync_depth_00836.png 518.8579
451
+ kitchen/rgb_00837.jpg kitchen/sync_depth_00837.png 518.8579
452
+ kitchen/rgb_00838.jpg kitchen/sync_depth_00838.png 518.8579
453
+ kitchen/rgb_00839.jpg kitchen/sync_depth_00839.png 518.8579
454
+ kitchen/rgb_00840.jpg kitchen/sync_depth_00840.png 518.8579
455
+ kitchen/rgb_00841.jpg kitchen/sync_depth_00841.png 518.8579
456
+ kitchen/rgb_00842.jpg kitchen/sync_depth_00842.png 518.8579
457
+ kitchen/rgb_00843.jpg kitchen/sync_depth_00843.png 518.8579
458
+ kitchen/rgb_00845.jpg kitchen/sync_depth_00845.png 518.8579
459
+ kitchen/rgb_00849.jpg kitchen/sync_depth_00849.png 518.8579
460
+ kitchen/rgb_00850.jpg kitchen/sync_depth_00850.png 518.8579
461
+ kitchen/rgb_00851.jpg kitchen/sync_depth_00851.png 518.8579
462
+ kitchen/rgb_00856.jpg kitchen/sync_depth_00856.png 518.8579
463
+ kitchen/rgb_00857.jpg kitchen/sync_depth_00857.png 518.8579
464
+ kitchen/rgb_00858.jpg kitchen/sync_depth_00858.png 518.8579
465
+ kitchen/rgb_00859.jpg kitchen/sync_depth_00859.png 518.8579
466
+ kitchen/rgb_00860.jpg kitchen/sync_depth_00860.png 518.8579
467
+ kitchen/rgb_00861.jpg kitchen/sync_depth_00861.png 518.8579
468
+ kitchen/rgb_00868.jpg kitchen/sync_depth_00868.png 518.8579
469
+ kitchen/rgb_00869.jpg kitchen/sync_depth_00869.png 518.8579
470
+ kitchen/rgb_00870.jpg kitchen/sync_depth_00870.png 518.8579
471
+ kitchen/rgb_00905.jpg kitchen/sync_depth_00905.png 518.8579
472
+ kitchen/rgb_00906.jpg kitchen/sync_depth_00906.png 518.8579
473
+ kitchen/rgb_00907.jpg kitchen/sync_depth_00907.png 518.8579
474
+ living_room/rgb_00152.jpg living_room/sync_depth_00152.png 518.8579
475
+ living_room/rgb_00153.jpg living_room/sync_depth_00153.png 518.8579
476
+ living_room/rgb_00154.jpg living_room/sync_depth_00154.png 518.8579
477
+ living_room/rgb_00166.jpg living_room/sync_depth_00166.png 518.8579
478
+ living_room/rgb_00167.jpg living_room/sync_depth_00167.png 518.8579
479
+ living_room/rgb_00168.jpg living_room/sync_depth_00168.png 518.8579
480
+ living_room/rgb_00206.jpg living_room/sync_depth_00206.png 518.8579
481
+ living_room/rgb_00207.jpg living_room/sync_depth_00207.png 518.8579
482
+ living_room/rgb_00208.jpg living_room/sync_depth_00208.png 518.8579
483
+ living_room/rgb_00209.jpg living_room/sync_depth_00209.png 518.8579
484
+ living_room/rgb_00210.jpg living_room/sync_depth_00210.png 518.8579
485
+ living_room/rgb_00211.jpg living_room/sync_depth_00211.png 518.8579
486
+ living_room/rgb_00263.jpg living_room/sync_depth_00263.png 518.8579
487
+ living_room/rgb_00578.jpg living_room/sync_depth_00578.png 518.8579
488
+ living_room/rgb_00579.jpg living_room/sync_depth_00579.png 518.8579
489
+ living_room/rgb_00580.jpg living_room/sync_depth_00580.png 518.8579
490
+ living_room/rgb_00581.jpg living_room/sync_depth_00581.png 518.8579
491
+ living_room/rgb_00590.jpg living_room/sync_depth_00590.png 518.8579
492
+ living_room/rgb_00591.jpg living_room/sync_depth_00591.png 518.8579
493
+ living_room/rgb_00592.jpg living_room/sync_depth_00592.png 518.8579
494
+ living_room/rgb_00593.jpg living_room/sync_depth_00593.png 518.8579
495
+ living_room/rgb_00602.jpg living_room/sync_depth_00602.png 518.8579
496
+ living_room/rgb_00603.jpg living_room/sync_depth_00603.png 518.8579
497
+ living_room/rgb_00604.jpg living_room/sync_depth_00604.png 518.8579
498
+ living_room/rgb_00605.jpg living_room/sync_depth_00605.png 518.8579
499
+ living_room/rgb_00606.jpg living_room/sync_depth_00606.png 518.8579
500
+ living_room/rgb_01200.jpg living_room/sync_depth_01200.png 518.8579
501
+ living_room/rgb_01201.jpg living_room/sync_depth_01201.png 518.8579
502
+ living_room/rgb_01202.jpg living_room/sync_depth_01202.png 518.8579
503
+ living_room/rgb_01203.jpg living_room/sync_depth_01203.png 518.8579
504
+ living_room/rgb_01204.jpg living_room/sync_depth_01204.png 518.8579
505
+ living_room/rgb_01205.jpg living_room/sync_depth_01205.png 518.8579
506
+ living_room/rgb_01206.jpg living_room/sync_depth_01206.png 518.8579
507
+ living_room/rgb_01207.jpg living_room/sync_depth_01207.png 518.8579
508
+ living_room/rgb_00582.jpg living_room/sync_depth_00582.png 518.8579
509
+ living_room/rgb_01208.jpg living_room/sync_depth_01208.png 518.8579
510
+ living_room/rgb_01247.jpg living_room/sync_depth_01247.png 518.8579
511
+ living_room/rgb_01277.jpg living_room/sync_depth_01277.png 518.8579
512
+ living_room/rgb_01302.jpg living_room/sync_depth_01302.png 518.8579
513
+ living_room/rgb_01209.jpg living_room/sync_depth_01209.png 518.8579
514
+ living_room/rgb_01210.jpg living_room/sync_depth_01210.png 518.8579
515
+ living_room/rgb_01211.jpg living_room/sync_depth_01211.png 518.8579
516
+ living_room/rgb_01215.jpg living_room/sync_depth_01215.png 518.8579
517
+ living_room/rgb_01216.jpg living_room/sync_depth_01216.png 518.8579
518
+ living_room/rgb_01217.jpg living_room/sync_depth_01217.png 518.8579
519
+ living_room/rgb_01218.jpg living_room/sync_depth_01218.png 518.8579
520
+ living_room/rgb_01219.jpg living_room/sync_depth_01219.png 518.8579
521
+ living_room/rgb_01225.jpg living_room/sync_depth_01225.png 518.8579
522
+ living_room/rgb_01226.jpg living_room/sync_depth_01226.png 518.8579
523
+ living_room/rgb_01227.jpg living_room/sync_depth_01227.png 518.8579
524
+ living_room/rgb_01228.jpg living_room/sync_depth_01228.png 518.8579
525
+ living_room/rgb_01229.jpg living_room/sync_depth_01229.png 518.8579
526
+ living_room/rgb_01232.jpg living_room/sync_depth_01232.png 518.8579
527
+ living_room/rgb_01233.jpg living_room/sync_depth_01233.png 518.8579
528
+ living_room/rgb_01234.jpg living_room/sync_depth_01234.png 518.8579
529
+ living_room/rgb_01246.jpg living_room/sync_depth_01246.png 518.8579
530
+ living_room/rgb_01248.jpg living_room/sync_depth_01248.png 518.8579
531
+ living_room/rgb_01249.jpg living_room/sync_depth_01249.png 518.8579
532
+ living_room/rgb_01253.jpg living_room/sync_depth_01253.png 518.8579
533
+ living_room/rgb_01254.jpg living_room/sync_depth_01254.png 518.8579
534
+ living_room/rgb_01255.jpg living_room/sync_depth_01255.png 518.8579
535
+ living_room/rgb_01256.jpg living_room/sync_depth_01256.png 518.8579
536
+ living_room/rgb_01257.jpg living_room/sync_depth_01257.png 518.8579
537
+ living_room/rgb_01258.jpg living_room/sync_depth_01258.png 518.8579
538
+ living_room/rgb_01259.jpg living_room/sync_depth_01259.png 518.8579
539
+ living_room/rgb_01260.jpg living_room/sync_depth_01260.png 518.8579
540
+ living_room/rgb_01261.jpg living_room/sync_depth_01261.png 518.8579
541
+ living_room/rgb_01262.jpg living_room/sync_depth_01262.png 518.8579
542
+ living_room/rgb_01263.jpg living_room/sync_depth_01263.png 518.8579
543
+ living_room/rgb_01264.jpg living_room/sync_depth_01264.png 518.8579
544
+ living_room/rgb_01274.jpg living_room/sync_depth_01274.png 518.8579
545
+ living_room/rgb_01275.jpg living_room/sync_depth_01275.png 518.8579
546
+ living_room/rgb_01276.jpg living_room/sync_depth_01276.png 518.8579
547
+ living_room/rgb_01278.jpg living_room/sync_depth_01278.png 518.8579
548
+ living_room/rgb_01279.jpg living_room/sync_depth_01279.png 518.8579
549
+ living_room/rgb_01284.jpg living_room/sync_depth_01284.png 518.8579
550
+ living_room/rgb_01285.jpg living_room/sync_depth_01285.png 518.8579
551
+ living_room/rgb_01286.jpg living_room/sync_depth_01286.png 518.8579
552
+ living_room/rgb_01287.jpg living_room/sync_depth_01287.png 518.8579
553
+ living_room/rgb_01288.jpg living_room/sync_depth_01288.png 518.8579
554
+ living_room/rgb_01289.jpg living_room/sync_depth_01289.png 518.8579
555
+ living_room/rgb_01290.jpg living_room/sync_depth_01290.png 518.8579
556
+ living_room/rgb_01291.jpg living_room/sync_depth_01291.png 518.8579
557
+ living_room/rgb_01292.jpg living_room/sync_depth_01292.png 518.8579
558
+ living_room/rgb_01293.jpg living_room/sync_depth_01293.png 518.8579
559
+ living_room/rgb_01294.jpg living_room/sync_depth_01294.png 518.8579
560
+ living_room/rgb_01296.jpg living_room/sync_depth_01296.png 518.8579
561
+ living_room/rgb_01297.jpg living_room/sync_depth_01297.png 518.8579
562
+ living_room/rgb_01298.jpg living_room/sync_depth_01298.png 518.8579
563
+ living_room/rgb_01301.jpg living_room/sync_depth_01301.png 518.8579
564
+ living_room/rgb_01303.jpg living_room/sync_depth_01303.png 518.8579
565
+ living_room/rgb_01304.jpg living_room/sync_depth_01304.png 518.8579
566
+ living_room/rgb_01305.jpg living_room/sync_depth_01305.png 518.8579
567
+ living_room/rgb_01306.jpg living_room/sync_depth_01306.png 518.8579
568
+ living_room/rgb_01307.jpg living_room/sync_depth_01307.png 518.8579
569
+ living_room/rgb_01313.jpg living_room/sync_depth_01313.png 518.8579
570
+ living_room/rgb_01314.jpg living_room/sync_depth_01314.png 518.8579
571
+ living_room/rgb_01328.jpg living_room/sync_depth_01328.png 518.8579
572
+ living_room/rgb_01329.jpg living_room/sync_depth_01329.png 518.8579
573
+ living_room/rgb_01330.jpg living_room/sync_depth_01330.png 518.8579
574
+ living_room/rgb_01331.jpg living_room/sync_depth_01331.png 518.8579
575
+ living_room/rgb_01334.jpg living_room/sync_depth_01334.png 518.8579
576
+ living_room/rgb_01335.jpg living_room/sync_depth_01335.png 518.8579
577
+ living_room/rgb_01336.jpg living_room/sync_depth_01336.png 518.8579
578
+ living_room/rgb_01337.jpg living_room/sync_depth_01337.png 518.8579
579
+ living_room/rgb_01338.jpg living_room/sync_depth_01338.png 518.8579
580
+ living_room/rgb_01339.jpg living_room/sync_depth_01339.png 518.8579
581
+ office/rgb_00008.jpg office/sync_depth_00008.png 518.8579
582
+ office/rgb_00013.jpg office/sync_depth_00013.png 518.8579
583
+ office/rgb_00014.jpg office/sync_depth_00014.png 518.8579
584
+ office/rgb_00015.jpg office/sync_depth_00015.png 518.8579
585
+ office/rgb_00016.jpg office/sync_depth_00016.png 518.8579
586
+ office/rgb_00017.jpg office/sync_depth_00017.png 518.8579
587
+ office/rgb_00020.jpg office/sync_depth_00020.png 518.8579
588
+ office/rgb_00027.jpg office/sync_depth_00027.png 518.8579
589
+ office/rgb_00028.jpg office/sync_depth_00028.png 518.8579
590
+ office/rgb_00029.jpg office/sync_depth_00029.png 518.8579
591
+ office/rgb_00030.jpg office/sync_depth_00030.png 518.8579
592
+ office/rgb_00031.jpg office/sync_depth_00031.png 518.8579
593
+ office/rgb_00032.jpg office/sync_depth_00032.png 518.8579
594
+ office/rgb_00033.jpg office/sync_depth_00033.png 518.8579
595
+ office/rgb_00034.jpg office/sync_depth_00034.png 518.8579
596
+ office/rgb_00035.jpg office/sync_depth_00035.png 518.8579
597
+ office/rgb_00036.jpg office/sync_depth_00036.png 518.8579
598
+ office/rgb_00038.jpg office/sync_depth_00038.png 518.8579
599
+ office/rgb_00039.jpg office/sync_depth_00039.png 518.8579
600
+ office/rgb_00040.jpg office/sync_depth_00040.png 518.8579
601
+ office/rgb_00041.jpg office/sync_depth_00041.png 518.8579
602
+ office/rgb_00042.jpg office/sync_depth_00042.png 518.8579
603
+ office/rgb_00270.jpg office/sync_depth_00270.png 518.8579
604
+ office/rgb_00271.jpg office/sync_depth_00271.png 518.8579
605
+ office/rgb_00611.jpg office/sync_depth_00611.png 518.8579
606
+ office/rgb_00612.jpg office/sync_depth_00612.png 518.8579
607
+ office/rgb_00616.jpg office/sync_depth_00616.png 518.8579
608
+ office/rgb_00617.jpg office/sync_depth_00617.png 518.8579
609
+ office/rgb_00618.jpg office/sync_depth_00618.png 518.8579
610
+ office/rgb_00619.jpg office/sync_depth_00619.png 518.8579
611
+ office/rgb_00620.jpg office/sync_depth_00620.png 518.8579
612
+ office/rgb_00632.jpg office/sync_depth_00632.png 518.8579
613
+ office/rgb_00633.jpg office/sync_depth_00633.png 518.8579
614
+ office/rgb_00634.jpg office/sync_depth_00634.png 518.8579
615
+ office/rgb_00635.jpg office/sync_depth_00635.png 518.8579
616
+ office/rgb_00636.jpg office/sync_depth_00636.png 518.8579
617
+ office/rgb_00637.jpg office/sync_depth_00637.png 518.8579
618
+ office/rgb_00037.jpg office/sync_depth_00037.png 518.8579
619
+ office_kitchen/rgb_00410.jpg office_kitchen/sync_depth_00410.png 518.8579
620
+ office_kitchen/rgb_00411.jpg office_kitchen/sync_depth_00411.png 518.8579
621
+ office_kitchen/rgb_00412.jpg office_kitchen/sync_depth_00412.png 518.8579
622
+ office_kitchen/rgb_00413.jpg office_kitchen/sync_depth_00413.png 518.8579
623
+ playroom/rgb_00429.jpg playroom/sync_depth_00429.png 518.8579
624
+ playroom/rgb_00430.jpg playroom/sync_depth_00430.png 518.8579
625
+ playroom/rgb_00431.jpg playroom/sync_depth_00431.png 518.8579
626
+ playroom/rgb_00432.jpg playroom/sync_depth_00432.png 518.8579
627
+ playroom/rgb_00433.jpg playroom/sync_depth_00433.png 518.8579
628
+ playroom/rgb_00434.jpg playroom/sync_depth_00434.png 518.8579
629
+ playroom/rgb_00440.jpg playroom/sync_depth_00440.png 518.8579
630
+ playroom/rgb_00441.jpg playroom/sync_depth_00441.png 518.8579
631
+ playroom/rgb_00442.jpg playroom/sync_depth_00442.png 518.8579
632
+ playroom/rgb_00443.jpg playroom/sync_depth_00443.png 518.8579
633
+ playroom/rgb_00444.jpg playroom/sync_depth_00444.png 518.8579
634
+ playroom/rgb_00445.jpg playroom/sync_depth_00445.png 518.8579
635
+ playroom/rgb_00446.jpg playroom/sync_depth_00446.png 518.8579
636
+ playroom/rgb_00447.jpg playroom/sync_depth_00447.png 518.8579
637
+ reception_room/rgb_00461.jpg reception_room/sync_depth_00461.png 518.8579
638
+ reception_room/rgb_00462.jpg reception_room/sync_depth_00462.png 518.8579
639
+ reception_room/rgb_00463.jpg reception_room/sync_depth_00463.png 518.8579
640
+ reception_room/rgb_00464.jpg reception_room/sync_depth_00464.png 518.8579
641
+ reception_room/rgb_00465.jpg reception_room/sync_depth_00465.png 518.8579
642
+ study/rgb_00468.jpg study/sync_depth_00468.png 518.8579
643
+ study/rgb_00469.jpg study/sync_depth_00469.png 518.8579
644
+ study/rgb_00470.jpg study/sync_depth_00470.png 518.8579
645
+ study/rgb_00471.jpg study/sync_depth_00471.png 518.8579
646
+ study/rgb_00472.jpg study/sync_depth_00472.png 518.8579
647
+ study/rgb_00473.jpg study/sync_depth_00473.png 518.8579
648
+ study/rgb_00474.jpg study/sync_depth_00474.png 518.8579
649
+ study/rgb_00475.jpg study/sync_depth_00475.png 518.8579
650
+ study/rgb_00476.jpg study/sync_depth_00476.png 518.8579
651
+ study/rgb_00643.jpg study/sync_depth_00643.png 518.8579
652
+ study/rgb_00644.jpg study/sync_depth_00644.png 518.8579
653
+ study_room/rgb_00272.jpg study_room/sync_depth_00272.png 518.8579
654
+ study_room/rgb_00278.jpg study_room/sync_depth_00278.png 518.8579
metric_depth/train_test_inputs/nyudepthv2_train_files_with_gt.txt ADDED
The diff for this file is too large to render. See raw diff
 
metric_depth/zoedepth/data/__init__.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
metric_depth/zoedepth/data/data_mono.py ADDED
@@ -0,0 +1,573 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ # This file is partly inspired from BTS (https://github.com/cleinc/bts/blob/master/pytorch/bts_dataloader.py); author: Jin Han Lee
26
+
27
+ import itertools
28
+ import os
29
+ import random
30
+
31
+ import numpy as np
32
+ import cv2
33
+ import torch
34
+ import torch.nn as nn
35
+ import torch.utils.data.distributed
36
+ from zoedepth.utils.easydict import EasyDict as edict
37
+ from PIL import Image, ImageOps
38
+ from torch.utils.data import DataLoader, Dataset
39
+ from torchvision import transforms
40
+
41
+ from zoedepth.utils.config import change_dataset
42
+
43
+ from .ddad import get_ddad_loader
44
+ from .diml_indoor_test import get_diml_indoor_loader
45
+ from .diml_outdoor_test import get_diml_outdoor_loader
46
+ from .diode import get_diode_loader
47
+ from .hypersim import get_hypersim_loader
48
+ from .ibims import get_ibims_loader
49
+ from .sun_rgbd_loader import get_sunrgbd_loader
50
+ from .vkitti import get_vkitti_loader
51
+ from .vkitti2 import get_vkitti2_loader
52
+
53
+ from .preprocess import CropParams, get_white_border, get_black_border
54
+
55
+
56
+ def _is_pil_image(img):
57
+ return isinstance(img, Image.Image)
58
+
59
+
60
+ def _is_numpy_image(img):
61
+ return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
62
+
63
+
64
+ def preprocessing_transforms(mode, **kwargs):
65
+ return transforms.Compose([
66
+ ToTensor(mode=mode, **kwargs)
67
+ ])
68
+
69
+
70
+ class DepthDataLoader(object):
71
+ def __init__(self, config, mode, device='cpu', transform=None, **kwargs):
72
+ """
73
+ Data loader for depth datasets
74
+
75
+ Args:
76
+ config (dict): Config dictionary. Refer to utils/config.py
77
+ mode (str): "train" or "online_eval"
78
+ device (str, optional): Device to load the data on. Defaults to 'cpu'.
79
+ transform (torchvision.transforms, optional): Transform to apply to the data. Defaults to None.
80
+ """
81
+
82
+ self.config = config
83
+
84
+ if config.dataset == 'ibims':
85
+ self.data = get_ibims_loader(config, batch_size=1, num_workers=1)
86
+ return
87
+
88
+ if config.dataset == 'sunrgbd':
89
+ self.data = get_sunrgbd_loader(
90
+ data_dir_root=config.sunrgbd_root, batch_size=1, num_workers=1)
91
+ return
92
+
93
+ if config.dataset == 'diml_indoor':
94
+ self.data = get_diml_indoor_loader(
95
+ data_dir_root=config.diml_indoor_root, batch_size=1, num_workers=1)
96
+ return
97
+
98
+ if config.dataset == 'diml_outdoor':
99
+ self.data = get_diml_outdoor_loader(
100
+ data_dir_root=config.diml_outdoor_root, batch_size=1, num_workers=1)
101
+ return
102
+
103
+ if "diode" in config.dataset:
104
+ self.data = get_diode_loader(
105
+ config[config.dataset+"_root"], batch_size=1, num_workers=1)
106
+ return
107
+
108
+ if config.dataset == 'hypersim_test':
109
+ self.data = get_hypersim_loader(
110
+ config.hypersim_test_root, batch_size=1, num_workers=1)
111
+ return
112
+
113
+ if config.dataset == 'vkitti':
114
+ self.data = get_vkitti_loader(
115
+ config.vkitti_root, batch_size=1, num_workers=1)
116
+ return
117
+
118
+ if config.dataset == 'vkitti2':
119
+ self.data = get_vkitti2_loader(
120
+ config.vkitti2_root, batch_size=1, num_workers=1)
121
+ return
122
+
123
+ if config.dataset == 'ddad':
124
+ self.data = get_ddad_loader(config.ddad_root, resize_shape=(
125
+ 352, 1216), batch_size=1, num_workers=1)
126
+ return
127
+
128
+ img_size = self.config.get("img_size", None)
129
+ img_size = img_size if self.config.get(
130
+ "do_input_resize", False) else None
131
+
132
+ if transform is None:
133
+ transform = preprocessing_transforms(mode, size=img_size)
134
+
135
+ if mode == 'train':
136
+
137
+ Dataset = DataLoadPreprocess
138
+ self.training_samples = Dataset(
139
+ config, mode, transform=transform, device=device)
140
+
141
+ if config.distributed:
142
+ self.train_sampler = torch.utils.data.distributed.DistributedSampler(
143
+ self.training_samples)
144
+ else:
145
+ self.train_sampler = None
146
+
147
+ self.data = DataLoader(self.training_samples,
148
+ batch_size=config.batch_size,
149
+ shuffle=(self.train_sampler is None),
150
+ num_workers=config.workers,
151
+ pin_memory=True,
152
+ persistent_workers=True,
153
+ # prefetch_factor=2,
154
+ sampler=self.train_sampler)
155
+
156
+ elif mode == 'online_eval':
157
+ self.testing_samples = DataLoadPreprocess(
158
+ config, mode, transform=transform)
159
+ if config.distributed: # redundant. here only for readability and to be more explicit
160
+ # Give whole test set to all processes (and report evaluation only on one) regardless
161
+ self.eval_sampler = None
162
+ else:
163
+ self.eval_sampler = None
164
+ self.data = DataLoader(self.testing_samples, 1,
165
+ shuffle=kwargs.get("shuffle_test", False),
166
+ num_workers=1,
167
+ pin_memory=False,
168
+ sampler=self.eval_sampler)
169
+
170
+ elif mode == 'test':
171
+ self.testing_samples = DataLoadPreprocess(
172
+ config, mode, transform=transform)
173
+ self.data = DataLoader(self.testing_samples,
174
+ 1, shuffle=False, num_workers=1)
175
+
176
+ else:
177
+ print(
178
+ 'mode should be one of \'train, test, online_eval\'. Got {}'.format(mode))
179
+
180
+
181
+ def repetitive_roundrobin(*iterables):
182
+ """
183
+ cycles through iterables but sample wise
184
+ first yield first sample from first iterable then first sample from second iterable and so on
185
+ then second sample from first iterable then second sample from second iterable and so on
186
+
187
+ If one iterable is shorter than the others, it is repeated until all iterables are exhausted
188
+ repetitive_roundrobin('ABC', 'D', 'EF') --> A D E B D F C D E
189
+ """
190
+ # Repetitive roundrobin
191
+ iterables_ = [iter(it) for it in iterables]
192
+ exhausted = [False] * len(iterables)
193
+ while not all(exhausted):
194
+ for i, it in enumerate(iterables_):
195
+ try:
196
+ yield next(it)
197
+ except StopIteration:
198
+ exhausted[i] = True
199
+ iterables_[i] = itertools.cycle(iterables[i])
200
+ # First elements may get repeated if one iterable is shorter than the others
201
+ yield next(iterables_[i])
202
+
203
+
204
+ class RepetitiveRoundRobinDataLoader(object):
205
+ def __init__(self, *dataloaders):
206
+ self.dataloaders = dataloaders
207
+
208
+ def __iter__(self):
209
+ return repetitive_roundrobin(*self.dataloaders)
210
+
211
+ def __len__(self):
212
+ # First samples get repeated, thats why the plus one
213
+ return len(self.dataloaders) * (max(len(dl) for dl in self.dataloaders) + 1)
214
+
215
+
216
+ class MixedNYUKITTI(object):
217
+ def __init__(self, config, mode, device='cpu', **kwargs):
218
+ config = edict(config)
219
+ config.workers = config.workers // 2
220
+ self.config = config
221
+ nyu_conf = change_dataset(edict(config), 'nyu')
222
+ kitti_conf = change_dataset(edict(config), 'kitti')
223
+
224
+ # make nyu default for testing
225
+ self.config = config = nyu_conf
226
+ img_size = self.config.get("img_size", None)
227
+ img_size = img_size if self.config.get(
228
+ "do_input_resize", False) else None
229
+ if mode == 'train':
230
+ nyu_loader = DepthDataLoader(
231
+ nyu_conf, mode, device=device, transform=preprocessing_transforms(mode, size=img_size)).data
232
+ kitti_loader = DepthDataLoader(
233
+ kitti_conf, mode, device=device, transform=preprocessing_transforms(mode, size=img_size)).data
234
+ # It has been changed to repetitive roundrobin
235
+ self.data = RepetitiveRoundRobinDataLoader(
236
+ nyu_loader, kitti_loader)
237
+ else:
238
+ self.data = DepthDataLoader(nyu_conf, mode, device=device).data
239
+
240
+
241
+ def remove_leading_slash(s):
242
+ if s[0] == '/' or s[0] == '\\':
243
+ return s[1:]
244
+ return s
245
+
246
+
247
+ class CachedReader:
248
+ def __init__(self, shared_dict=None):
249
+ if shared_dict:
250
+ self._cache = shared_dict
251
+ else:
252
+ self._cache = {}
253
+
254
+ def open(self, fpath):
255
+ im = self._cache.get(fpath, None)
256
+ if im is None:
257
+ im = self._cache[fpath] = Image.open(fpath)
258
+ return im
259
+
260
+
261
+ class ImReader:
262
+ def __init__(self):
263
+ pass
264
+
265
+ # @cache
266
+ def open(self, fpath):
267
+ return Image.open(fpath)
268
+
269
+
270
+ class DataLoadPreprocess(Dataset):
271
+ def __init__(self, config, mode, transform=None, is_for_online_eval=False, **kwargs):
272
+ self.config = config
273
+ if mode == 'online_eval':
274
+ with open(config.filenames_file_eval, 'r') as f:
275
+ self.filenames = f.readlines()
276
+ else:
277
+ with open(config.filenames_file, 'r') as f:
278
+ self.filenames = f.readlines()
279
+
280
+ self.mode = mode
281
+ self.transform = transform
282
+ self.to_tensor = ToTensor(mode)
283
+ self.is_for_online_eval = is_for_online_eval
284
+ if config.use_shared_dict:
285
+ self.reader = CachedReader(config.shared_dict)
286
+ else:
287
+ self.reader = ImReader()
288
+
289
+ def postprocess(self, sample):
290
+ return sample
291
+
292
+ def __getitem__(self, idx):
293
+ sample_path = self.filenames[idx]
294
+ focal = float(sample_path.split()[2])
295
+ sample = {}
296
+
297
+ if self.mode == 'train':
298
+ if self.config.dataset == 'kitti' and self.config.use_right and random.random() > 0.5:
299
+ image_path = os.path.join(
300
+ self.config.data_path, remove_leading_slash(sample_path.split()[3]))
301
+ depth_path = os.path.join(
302
+ self.config.gt_path, remove_leading_slash(sample_path.split()[4]))
303
+ else:
304
+ image_path = os.path.join(
305
+ self.config.data_path, remove_leading_slash(sample_path.split()[0]))
306
+ depth_path = os.path.join(
307
+ self.config.gt_path, remove_leading_slash(sample_path.split()[1]))
308
+
309
+ image = self.reader.open(image_path)
310
+ depth_gt = self.reader.open(depth_path)
311
+ w, h = image.size
312
+
313
+ if self.config.do_kb_crop:
314
+ height = image.height
315
+ width = image.width
316
+ top_margin = int(height - 352)
317
+ left_margin = int((width - 1216) / 2)
318
+ depth_gt = depth_gt.crop(
319
+ (left_margin, top_margin, left_margin + 1216, top_margin + 352))
320
+ image = image.crop(
321
+ (left_margin, top_margin, left_margin + 1216, top_margin + 352))
322
+
323
+ # Avoid blank boundaries due to pixel registration?
324
+ # Train images have white border. Test images have black border.
325
+ if self.config.dataset == 'nyu' and self.config.avoid_boundary:
326
+ # print("Avoiding Blank Boundaries!")
327
+ # We just crop and pad again with reflect padding to original size
328
+ # original_size = image.size
329
+ crop_params = get_white_border(np.array(image, dtype=np.uint8))
330
+ image = image.crop((crop_params.left, crop_params.top, crop_params.right, crop_params.bottom))
331
+ depth_gt = depth_gt.crop((crop_params.left, crop_params.top, crop_params.right, crop_params.bottom))
332
+
333
+ # Use reflect padding to fill the blank
334
+ image = np.array(image)
335
+ image = np.pad(image, ((crop_params.top, h - crop_params.bottom), (crop_params.left, w - crop_params.right), (0, 0)), mode='reflect')
336
+ image = Image.fromarray(image)
337
+
338
+ depth_gt = np.array(depth_gt)
339
+ depth_gt = np.pad(depth_gt, ((crop_params.top, h - crop_params.bottom), (crop_params.left, w - crop_params.right)), 'constant', constant_values=0)
340
+ depth_gt = Image.fromarray(depth_gt)
341
+
342
+
343
+ if self.config.do_random_rotate and (self.config.aug):
344
+ random_angle = (random.random() - 0.5) * 2 * self.config.degree
345
+ image = self.rotate_image(image, random_angle)
346
+ depth_gt = self.rotate_image(
347
+ depth_gt, random_angle, flag=Image.NEAREST)
348
+
349
+ image = np.asarray(image, dtype=np.float32) / 255.0
350
+ depth_gt = np.asarray(depth_gt, dtype=np.float32)
351
+ depth_gt = np.expand_dims(depth_gt, axis=2)
352
+
353
+ if self.config.dataset == 'nyu':
354
+ depth_gt = depth_gt / 1000.0
355
+ else:
356
+ depth_gt = depth_gt / 256.0
357
+
358
+ if self.config.aug and (self.config.random_crop):
359
+ image, depth_gt = self.random_crop(
360
+ image, depth_gt, self.config.input_height, self.config.input_width)
361
+
362
+ if self.config.aug and self.config.random_translate:
363
+ # print("Random Translation!")
364
+ image, depth_gt = self.random_translate(image, depth_gt, self.config.max_translation)
365
+
366
+ image, depth_gt = self.train_preprocess(image, depth_gt)
367
+ mask = np.logical_and(depth_gt > self.config.min_depth,
368
+ depth_gt < self.config.max_depth).squeeze()[None, ...]
369
+ sample = {'image': image, 'depth': depth_gt, 'focal': focal,
370
+ 'mask': mask, **sample}
371
+
372
+ else:
373
+ if self.mode == 'online_eval':
374
+ data_path = self.config.data_path_eval
375
+ else:
376
+ data_path = self.config.data_path
377
+
378
+ image_path = os.path.join(
379
+ data_path, remove_leading_slash(sample_path.split()[0]))
380
+ image = np.asarray(self.reader.open(image_path),
381
+ dtype=np.float32) / 255.0
382
+
383
+ if self.mode == 'online_eval':
384
+ gt_path = self.config.gt_path_eval
385
+ depth_path = os.path.join(
386
+ gt_path, remove_leading_slash(sample_path.split()[1]))
387
+ has_valid_depth = False
388
+ try:
389
+ depth_gt = self.reader.open(depth_path)
390
+ has_valid_depth = True
391
+ except IOError:
392
+ depth_gt = False
393
+ # print('Missing gt for {}'.format(image_path))
394
+
395
+ if has_valid_depth:
396
+ depth_gt = np.asarray(depth_gt, dtype=np.float32)
397
+ depth_gt = np.expand_dims(depth_gt, axis=2)
398
+ if self.config.dataset == 'nyu':
399
+ depth_gt = depth_gt / 1000.0
400
+ else:
401
+ depth_gt = depth_gt / 256.0
402
+
403
+ mask = np.logical_and(
404
+ depth_gt >= self.config.min_depth, depth_gt <= self.config.max_depth).squeeze()[None, ...]
405
+ else:
406
+ mask = False
407
+
408
+ if self.config.do_kb_crop:
409
+ height = image.shape[0]
410
+ width = image.shape[1]
411
+ top_margin = int(height - 352)
412
+ left_margin = int((width - 1216) / 2)
413
+ image = image[top_margin:top_margin + 352,
414
+ left_margin:left_margin + 1216, :]
415
+ if self.mode == 'online_eval' and has_valid_depth:
416
+ depth_gt = depth_gt[top_margin:top_margin +
417
+ 352, left_margin:left_margin + 1216, :]
418
+
419
+ if self.mode == 'online_eval':
420
+ sample = {'image': image, 'depth': depth_gt, 'focal': focal, 'has_valid_depth': has_valid_depth,
421
+ 'image_path': sample_path.split()[0], 'depth_path': sample_path.split()[1],
422
+ 'mask': mask}
423
+ else:
424
+ sample = {'image': image, 'focal': focal}
425
+
426
+ if (self.mode == 'train') or ('has_valid_depth' in sample and sample['has_valid_depth']):
427
+ mask = np.logical_and(depth_gt > self.config.min_depth,
428
+ depth_gt < self.config.max_depth).squeeze()[None, ...]
429
+ sample['mask'] = mask
430
+
431
+ if self.transform:
432
+ sample = self.transform(sample)
433
+
434
+ sample = self.postprocess(sample)
435
+ sample['dataset'] = self.config.dataset
436
+ sample = {**sample, 'image_path': sample_path.split()[0], 'depth_path': sample_path.split()[1]}
437
+
438
+ return sample
439
+
440
+ def rotate_image(self, image, angle, flag=Image.BILINEAR):
441
+ result = image.rotate(angle, resample=flag)
442
+ return result
443
+
444
+ def random_crop(self, img, depth, height, width):
445
+ assert img.shape[0] >= height
446
+ assert img.shape[1] >= width
447
+ assert img.shape[0] == depth.shape[0]
448
+ assert img.shape[1] == depth.shape[1]
449
+ x = random.randint(0, img.shape[1] - width)
450
+ y = random.randint(0, img.shape[0] - height)
451
+ img = img[y:y + height, x:x + width, :]
452
+ depth = depth[y:y + height, x:x + width, :]
453
+
454
+ return img, depth
455
+
456
+ def random_translate(self, img, depth, max_t=20):
457
+ assert img.shape[0] == depth.shape[0]
458
+ assert img.shape[1] == depth.shape[1]
459
+ p = self.config.translate_prob
460
+ do_translate = random.random()
461
+ if do_translate > p:
462
+ return img, depth
463
+ x = random.randint(-max_t, max_t)
464
+ y = random.randint(-max_t, max_t)
465
+ M = np.float32([[1, 0, x], [0, 1, y]])
466
+ # print(img.shape, depth.shape)
467
+ img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
468
+ depth = cv2.warpAffine(depth, M, (depth.shape[1], depth.shape[0]))
469
+ depth = depth.squeeze()[..., None] # add channel dim back. Affine warp removes it
470
+ # print("after", img.shape, depth.shape)
471
+ return img, depth
472
+
473
+ def train_preprocess(self, image, depth_gt):
474
+ if self.config.aug:
475
+ # Random flipping
476
+ do_flip = random.random()
477
+ if do_flip > 0.5:
478
+ image = (image[:, ::-1, :]).copy()
479
+ depth_gt = (depth_gt[:, ::-1, :]).copy()
480
+
481
+ # Random gamma, brightness, color augmentation
482
+ do_augment = random.random()
483
+ if do_augment > 0.5:
484
+ image = self.augment_image(image)
485
+
486
+ return image, depth_gt
487
+
488
+ def augment_image(self, image):
489
+ # gamma augmentation
490
+ gamma = random.uniform(0.9, 1.1)
491
+ image_aug = image ** gamma
492
+
493
+ # brightness augmentation
494
+ if self.config.dataset == 'nyu':
495
+ brightness = random.uniform(0.75, 1.25)
496
+ else:
497
+ brightness = random.uniform(0.9, 1.1)
498
+ image_aug = image_aug * brightness
499
+
500
+ # color augmentation
501
+ colors = np.random.uniform(0.9, 1.1, size=3)
502
+ white = np.ones((image.shape[0], image.shape[1]))
503
+ color_image = np.stack([white * colors[i] for i in range(3)], axis=2)
504
+ image_aug *= color_image
505
+ image_aug = np.clip(image_aug, 0, 1)
506
+
507
+ return image_aug
508
+
509
+ def __len__(self):
510
+ return len(self.filenames)
511
+
512
+
513
+ class ToTensor(object):
514
+ def __init__(self, mode, do_normalize=False, size=None):
515
+ self.mode = mode
516
+ self.normalize = transforms.Normalize(
517
+ mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if do_normalize else nn.Identity()
518
+ self.size = size
519
+ if size is not None:
520
+ self.resize = transforms.Resize(size=size)
521
+ else:
522
+ self.resize = nn.Identity()
523
+
524
+ def __call__(self, sample):
525
+ image, focal = sample['image'], sample['focal']
526
+ image = self.to_tensor(image)
527
+ image = self.normalize(image)
528
+ image = self.resize(image)
529
+
530
+ if self.mode == 'test':
531
+ return {'image': image, 'focal': focal}
532
+
533
+ depth = sample['depth']
534
+ if self.mode == 'train':
535
+ depth = self.to_tensor(depth)
536
+ return {**sample, 'image': image, 'depth': depth, 'focal': focal}
537
+ else:
538
+ has_valid_depth = sample['has_valid_depth']
539
+ image = self.resize(image)
540
+ return {**sample, 'image': image, 'depth': depth, 'focal': focal, 'has_valid_depth': has_valid_depth,
541
+ 'image_path': sample['image_path'], 'depth_path': sample['depth_path']}
542
+
543
+ def to_tensor(self, pic):
544
+ if not (_is_pil_image(pic) or _is_numpy_image(pic)):
545
+ raise TypeError(
546
+ 'pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
547
+
548
+ if isinstance(pic, np.ndarray):
549
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
550
+ return img
551
+
552
+ # handle PIL Image
553
+ if pic.mode == 'I':
554
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
555
+ elif pic.mode == 'I;16':
556
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
557
+ else:
558
+ img = torch.ByteTensor(
559
+ torch.ByteStorage.from_buffer(pic.tobytes()))
560
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
561
+ if pic.mode == 'YCbCr':
562
+ nchannel = 3
563
+ elif pic.mode == 'I;16':
564
+ nchannel = 1
565
+ else:
566
+ nchannel = len(pic.mode)
567
+ img = img.view(pic.size[1], pic.size[0], nchannel)
568
+
569
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
570
+ if isinstance(img, torch.ByteTensor):
571
+ return img.float()
572
+ else:
573
+ return img
metric_depth/zoedepth/data/ddad.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import os
26
+
27
+ import numpy as np
28
+ import torch
29
+ from PIL import Image
30
+ from torch.utils.data import DataLoader, Dataset
31
+ from torchvision import transforms
32
+
33
+
34
+ class ToTensor(object):
35
+ def __init__(self, resize_shape):
36
+ # self.normalize = transforms.Normalize(
37
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
38
+ self.normalize = lambda x : x
39
+ self.resize = transforms.Resize(resize_shape)
40
+
41
+ def __call__(self, sample):
42
+ image, depth = sample['image'], sample['depth']
43
+ image = self.to_tensor(image)
44
+ image = self.normalize(image)
45
+ depth = self.to_tensor(depth)
46
+
47
+ image = self.resize(image)
48
+
49
+ return {'image': image, 'depth': depth, 'dataset': "ddad"}
50
+
51
+ def to_tensor(self, pic):
52
+
53
+ if isinstance(pic, np.ndarray):
54
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
55
+ return img
56
+
57
+ # # handle PIL Image
58
+ if pic.mode == 'I':
59
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
60
+ elif pic.mode == 'I;16':
61
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
62
+ else:
63
+ img = torch.ByteTensor(
64
+ torch.ByteStorage.from_buffer(pic.tobytes()))
65
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
66
+ if pic.mode == 'YCbCr':
67
+ nchannel = 3
68
+ elif pic.mode == 'I;16':
69
+ nchannel = 1
70
+ else:
71
+ nchannel = len(pic.mode)
72
+ img = img.view(pic.size[1], pic.size[0], nchannel)
73
+
74
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
75
+
76
+ if isinstance(img, torch.ByteTensor):
77
+ return img.float()
78
+ else:
79
+ return img
80
+
81
+
82
+ class DDAD(Dataset):
83
+ def __init__(self, data_dir_root, resize_shape):
84
+ import glob
85
+
86
+ # image paths are of the form <data_dir_root>/{outleft, depthmap}/*.png
87
+
88
+ # self.image_files = glob.glob(os.path.join(data_dir_root, '*.png'))
89
+ # self.depth_files = [r.replace("_rgb.png", "_depth.npy")
90
+ # for r in self.image_files]
91
+ self.image_files, self.depth_files = [], []
92
+ with open('/mnt/bn/liheyang/MTL-SA-1B/dataset/splits/ddad/val.txt', 'r') as f:
93
+ lines = f.read().splitlines()
94
+ for line in lines:
95
+ self.image_files.append(line.split(' ')[0])
96
+ self.depth_files.append(line.split(' ')[1])
97
+
98
+ self.transform = ToTensor(resize_shape)
99
+
100
+ def __getitem__(self, idx):
101
+
102
+ image_path = self.image_files[idx]
103
+ depth_path = self.depth_files[idx]
104
+
105
+ image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
106
+ depth = np.load(depth_path) # meters
107
+
108
+ # depth[depth > 8] = -1
109
+ depth = depth[..., None]
110
+
111
+ sample = dict(image=image, depth=depth)
112
+ sample = self.transform(sample)
113
+
114
+ if idx == 0:
115
+ print(sample["image"].shape)
116
+
117
+ return sample
118
+
119
+ def __len__(self):
120
+ return len(self.image_files)
121
+
122
+
123
+ def get_ddad_loader(data_dir_root, resize_shape, batch_size=1, **kwargs):
124
+ dataset = DDAD(data_dir_root, resize_shape)
125
+ return DataLoader(dataset, batch_size, **kwargs)
metric_depth/zoedepth/data/diml_indoor_test.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import os
26
+
27
+ import numpy as np
28
+ import torch
29
+ from PIL import Image
30
+ from torch.utils.data import DataLoader, Dataset
31
+ from torchvision import transforms
32
+
33
+
34
+ class ToTensor(object):
35
+ def __init__(self):
36
+ # self.normalize = transforms.Normalize(
37
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
38
+ self.normalize = lambda x : x
39
+ self.resize = transforms.Resize((480, 640))
40
+
41
+ def __call__(self, sample):
42
+ image, depth = sample['image'], sample['depth']
43
+ image = self.to_tensor(image)
44
+ image = self.normalize(image)
45
+ depth = self.to_tensor(depth)
46
+
47
+ image = self.resize(image)
48
+
49
+ return {'image': image, 'depth': depth, 'dataset': "diml_indoor"}
50
+
51
+ def to_tensor(self, pic):
52
+
53
+ if isinstance(pic, np.ndarray):
54
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
55
+ return img
56
+
57
+ # # handle PIL Image
58
+ if pic.mode == 'I':
59
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
60
+ elif pic.mode == 'I;16':
61
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
62
+ else:
63
+ img = torch.ByteTensor(
64
+ torch.ByteStorage.from_buffer(pic.tobytes()))
65
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
66
+ if pic.mode == 'YCbCr':
67
+ nchannel = 3
68
+ elif pic.mode == 'I;16':
69
+ nchannel = 1
70
+ else:
71
+ nchannel = len(pic.mode)
72
+ img = img.view(pic.size[1], pic.size[0], nchannel)
73
+
74
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
75
+ if isinstance(img, torch.ByteTensor):
76
+ return img.float()
77
+ else:
78
+ return img
79
+
80
+
81
+ class DIML_Indoor(Dataset):
82
+ def __init__(self, data_dir_root):
83
+ import glob
84
+
85
+ # image paths are of the form <data_dir_root>/{HR, LR}/<scene>/{color, depth_filled}/*.png
86
+ self.image_files = glob.glob(os.path.join(
87
+ data_dir_root, "LR", '*', 'color', '*.png'))
88
+ self.depth_files = [r.replace("color", "depth_filled").replace(
89
+ "_c.png", "_depth_filled.png") for r in self.image_files]
90
+ self.transform = ToTensor()
91
+
92
+ def __getitem__(self, idx):
93
+ image_path = self.image_files[idx]
94
+ depth_path = self.depth_files[idx]
95
+
96
+ image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
97
+ depth = np.asarray(Image.open(depth_path),
98
+ dtype='uint16') / 1000.0 # mm to meters
99
+
100
+ # print(np.shape(image))
101
+ # print(np.shape(depth))
102
+
103
+ # depth[depth > 8] = -1
104
+ depth = depth[..., None]
105
+
106
+ sample = dict(image=image, depth=depth)
107
+
108
+ # return sample
109
+ sample = self.transform(sample)
110
+
111
+ if idx == 0:
112
+ print(sample["image"].shape)
113
+
114
+ return sample
115
+
116
+ def __len__(self):
117
+ return len(self.image_files)
118
+
119
+
120
+ def get_diml_indoor_loader(data_dir_root, batch_size=1, **kwargs):
121
+ dataset = DIML_Indoor(data_dir_root)
122
+ return DataLoader(dataset, batch_size, **kwargs)
123
+
124
+ # get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/HR")
125
+ # get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/LR")
metric_depth/zoedepth/data/diml_outdoor_test.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import os
26
+
27
+ import numpy as np
28
+ import torch
29
+ from PIL import Image
30
+ from torch.utils.data import DataLoader, Dataset
31
+ from torchvision import transforms
32
+
33
+
34
+ class ToTensor(object):
35
+ def __init__(self):
36
+ # self.normalize = transforms.Normalize(
37
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
38
+ self.normalize = lambda x : x
39
+
40
+ def __call__(self, sample):
41
+ image, depth = sample['image'], sample['depth']
42
+ image = self.to_tensor(image)
43
+ image = self.normalize(image)
44
+ depth = self.to_tensor(depth)
45
+
46
+ return {'image': image, 'depth': depth, 'dataset': "diml_outdoor"}
47
+
48
+ def to_tensor(self, pic):
49
+
50
+ if isinstance(pic, np.ndarray):
51
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
52
+ return img
53
+
54
+ # # handle PIL Image
55
+ if pic.mode == 'I':
56
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
57
+ elif pic.mode == 'I;16':
58
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
59
+ else:
60
+ img = torch.ByteTensor(
61
+ torch.ByteStorage.from_buffer(pic.tobytes()))
62
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
63
+ if pic.mode == 'YCbCr':
64
+ nchannel = 3
65
+ elif pic.mode == 'I;16':
66
+ nchannel = 1
67
+ else:
68
+ nchannel = len(pic.mode)
69
+ img = img.view(pic.size[1], pic.size[0], nchannel)
70
+
71
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
72
+ if isinstance(img, torch.ByteTensor):
73
+ return img.float()
74
+ else:
75
+ return img
76
+
77
+
78
+ class DIML_Outdoor(Dataset):
79
+ def __init__(self, data_dir_root):
80
+ import glob
81
+
82
+ # image paths are of the form <data_dir_root>/{outleft, depthmap}/*.png
83
+ self.image_files = glob.glob(os.path.join(
84
+ data_dir_root, 'outleft', '*.png'))
85
+ self.depth_files = [r.replace("outleft", "depthmap")
86
+ for r in self.image_files]
87
+ self.transform = ToTensor()
88
+
89
+ def __getitem__(self, idx):
90
+ image_path = self.image_files[idx]
91
+ depth_path = self.depth_files[idx]
92
+
93
+ image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
94
+ depth = np.asarray(Image.open(depth_path),
95
+ dtype='uint16') / 1000.0 # mm to meters
96
+
97
+ # depth[depth > 8] = -1
98
+ depth = depth[..., None]
99
+
100
+ sample = dict(image=image, depth=depth, dataset="diml_outdoor")
101
+
102
+ # return sample
103
+ return self.transform(sample)
104
+
105
+ def __len__(self):
106
+ return len(self.image_files)
107
+
108
+
109
+ def get_diml_outdoor_loader(data_dir_root, batch_size=1, **kwargs):
110
+ dataset = DIML_Outdoor(data_dir_root)
111
+ return DataLoader(dataset, batch_size, **kwargs)
112
+
113
+ # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/HR")
114
+ # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/LR")
metric_depth/zoedepth/data/diode.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import os
26
+
27
+ import numpy as np
28
+ import torch
29
+ from PIL import Image
30
+ from torch.utils.data import DataLoader, Dataset
31
+ from torchvision import transforms
32
+
33
+
34
+ class ToTensor(object):
35
+ def __init__(self):
36
+ # self.normalize = transforms.Normalize(
37
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
38
+ self.normalize = lambda x : x
39
+ self.resize = transforms.Resize(480)
40
+
41
+ def __call__(self, sample):
42
+ image, depth = sample['image'], sample['depth']
43
+ image = self.to_tensor(image)
44
+ image = self.normalize(image)
45
+ depth = self.to_tensor(depth)
46
+
47
+ image = self.resize(image)
48
+
49
+ return {'image': image, 'depth': depth, 'dataset': "diode"}
50
+
51
+ def to_tensor(self, pic):
52
+
53
+ if isinstance(pic, np.ndarray):
54
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
55
+ return img
56
+
57
+ # # handle PIL Image
58
+ if pic.mode == 'I':
59
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
60
+ elif pic.mode == 'I;16':
61
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
62
+ else:
63
+ img = torch.ByteTensor(
64
+ torch.ByteStorage.from_buffer(pic.tobytes()))
65
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
66
+ if pic.mode == 'YCbCr':
67
+ nchannel = 3
68
+ elif pic.mode == 'I;16':
69
+ nchannel = 1
70
+ else:
71
+ nchannel = len(pic.mode)
72
+ img = img.view(pic.size[1], pic.size[0], nchannel)
73
+
74
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
75
+
76
+ if isinstance(img, torch.ByteTensor):
77
+ return img.float()
78
+ else:
79
+ return img
80
+
81
+
82
+ class DIODE(Dataset):
83
+ def __init__(self, data_dir_root):
84
+ import glob
85
+
86
+ # image paths are of the form <data_dir_root>/scene_#/scan_#/*.png
87
+ self.image_files = glob.glob(
88
+ os.path.join(data_dir_root, '*', '*', '*.png'))
89
+ self.depth_files = [r.replace(".png", "_depth.npy")
90
+ for r in self.image_files]
91
+ self.depth_mask_files = [
92
+ r.replace(".png", "_depth_mask.npy") for r in self.image_files]
93
+ self.transform = ToTensor()
94
+
95
+ def __getitem__(self, idx):
96
+ image_path = self.image_files[idx]
97
+ depth_path = self.depth_files[idx]
98
+ depth_mask_path = self.depth_mask_files[idx]
99
+
100
+ image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
101
+ depth = np.load(depth_path) # in meters
102
+ valid = np.load(depth_mask_path) # binary
103
+
104
+ # depth[depth > 8] = -1
105
+ # depth = depth[..., None]
106
+
107
+ sample = dict(image=image, depth=depth, valid=valid)
108
+
109
+ # return sample
110
+ sample = self.transform(sample)
111
+
112
+ if idx == 0:
113
+ print(sample["image"].shape)
114
+
115
+ return sample
116
+
117
+ def __len__(self):
118
+ return len(self.image_files)
119
+
120
+
121
+ def get_diode_loader(data_dir_root, batch_size=1, **kwargs):
122
+ dataset = DIODE(data_dir_root)
123
+ return DataLoader(dataset, batch_size, **kwargs)
124
+
125
+ # get_diode_loader(data_dir_root="datasets/diode/val/outdoor")
metric_depth/zoedepth/data/hypersim.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import glob
26
+ import os
27
+
28
+ import h5py
29
+ import numpy as np
30
+ import torch
31
+ from PIL import Image
32
+ from torch.utils.data import DataLoader, Dataset
33
+ from torchvision import transforms
34
+
35
+
36
+ def hypersim_distance_to_depth(npyDistance):
37
+ intWidth, intHeight, fltFocal = 1024, 768, 886.81
38
+
39
+ npyImageplaneX = np.linspace((-0.5 * intWidth) + 0.5, (0.5 * intWidth) - 0.5, intWidth).reshape(
40
+ 1, intWidth).repeat(intHeight, 0).astype(np.float32)[:, :, None]
41
+ npyImageplaneY = np.linspace((-0.5 * intHeight) + 0.5, (0.5 * intHeight) - 0.5,
42
+ intHeight).reshape(intHeight, 1).repeat(intWidth, 1).astype(np.float32)[:, :, None]
43
+ npyImageplaneZ = np.full([intHeight, intWidth, 1], fltFocal, np.float32)
44
+ npyImageplane = np.concatenate(
45
+ [npyImageplaneX, npyImageplaneY, npyImageplaneZ], 2)
46
+
47
+ npyDepth = npyDistance / np.linalg.norm(npyImageplane, 2, 2) * fltFocal
48
+ return npyDepth
49
+
50
+
51
+ class ToTensor(object):
52
+ def __init__(self):
53
+ # self.normalize = transforms.Normalize(
54
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
55
+ self.normalize = lambda x: x
56
+ self.resize = transforms.Resize((480, 640))
57
+
58
+ def __call__(self, sample):
59
+ image, depth = sample['image'], sample['depth']
60
+ image = self.to_tensor(image)
61
+ image = self.normalize(image)
62
+ depth = self.to_tensor(depth)
63
+
64
+ image = self.resize(image)
65
+
66
+ return {'image': image, 'depth': depth, 'dataset': "hypersim"}
67
+
68
+ def to_tensor(self, pic):
69
+
70
+ if isinstance(pic, np.ndarray):
71
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
72
+ return img
73
+
74
+ # # handle PIL Image
75
+ if pic.mode == 'I':
76
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
77
+ elif pic.mode == 'I;16':
78
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
79
+ else:
80
+ img = torch.ByteTensor(
81
+ torch.ByteStorage.from_buffer(pic.tobytes()))
82
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
83
+ if pic.mode == 'YCbCr':
84
+ nchannel = 3
85
+ elif pic.mode == 'I;16':
86
+ nchannel = 1
87
+ else:
88
+ nchannel = len(pic.mode)
89
+ img = img.view(pic.size[1], pic.size[0], nchannel)
90
+
91
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
92
+ if isinstance(img, torch.ByteTensor):
93
+ return img.float()
94
+ else:
95
+ return img
96
+
97
+
98
+ class HyperSim(Dataset):
99
+ def __init__(self, data_dir_root):
100
+ # image paths are of the form <data_dir_root>/<scene>/images/scene_cam_#_final_preview/*.tonemap.jpg
101
+ # depth paths are of the form <data_dir_root>/<scene>/images/scene_cam_#_final_preview/*.depth_meters.hdf5
102
+ self.image_files = glob.glob(os.path.join(
103
+ data_dir_root, '*', 'images', 'scene_cam_*_final_preview', '*.tonemap.jpg'))
104
+ self.depth_files = [r.replace("_final_preview", "_geometry_hdf5").replace(
105
+ ".tonemap.jpg", ".depth_meters.hdf5") for r in self.image_files]
106
+ self.transform = ToTensor()
107
+
108
+ def __getitem__(self, idx):
109
+ image_path = self.image_files[idx]
110
+ depth_path = self.depth_files[idx]
111
+
112
+ image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
113
+
114
+ # depth from hdf5
115
+ depth_fd = h5py.File(depth_path, "r")
116
+ # in meters (Euclidean distance)
117
+ distance_meters = np.array(depth_fd['dataset'])
118
+ depth = hypersim_distance_to_depth(
119
+ distance_meters) # in meters (planar depth)
120
+
121
+ # depth[depth > 8] = -1
122
+ depth = depth[..., None]
123
+
124
+ sample = dict(image=image, depth=depth)
125
+ sample = self.transform(sample)
126
+
127
+ if idx == 0:
128
+ print(sample["image"].shape)
129
+
130
+ return sample
131
+
132
+ def __len__(self):
133
+ return len(self.image_files)
134
+
135
+
136
+ def get_hypersim_loader(data_dir_root, batch_size=1, **kwargs):
137
+ dataset = HyperSim(data_dir_root)
138
+ return DataLoader(dataset, batch_size, **kwargs)
metric_depth/zoedepth/data/ibims.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import os
26
+
27
+ import numpy as np
28
+ import torch
29
+ from PIL import Image
30
+ from torch.utils.data import DataLoader, Dataset
31
+ from torchvision import transforms as T
32
+
33
+
34
+ class iBims(Dataset):
35
+ def __init__(self, config):
36
+ root_folder = config.ibims_root
37
+ with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f:
38
+ imglist = f.read().split()
39
+
40
+ samples = []
41
+ for basename in imglist:
42
+ img_path = os.path.join(root_folder, 'rgb', basename + ".png")
43
+ depth_path = os.path.join(root_folder, 'depth', basename + ".png")
44
+ valid_mask_path = os.path.join(
45
+ root_folder, 'mask_invalid', basename+".png")
46
+ transp_mask_path = os.path.join(
47
+ root_folder, 'mask_transp', basename+".png")
48
+
49
+ samples.append(
50
+ (img_path, depth_path, valid_mask_path, transp_mask_path))
51
+
52
+ self.samples = samples
53
+ # self.normalize = T.Normalize(
54
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
55
+ self.normalize = lambda x : x
56
+
57
+ def __getitem__(self, idx):
58
+ img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx]
59
+
60
+ img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0
61
+ depth = np.asarray(Image.open(depth_path),
62
+ dtype=np.uint16).astype('float')*50.0/65535
63
+
64
+ mask_valid = np.asarray(Image.open(valid_mask_path))
65
+ mask_transp = np.asarray(Image.open(transp_mask_path))
66
+
67
+ # depth = depth * mask_valid * mask_transp
68
+ depth = np.where(mask_valid * mask_transp, depth, -1)
69
+
70
+ img = torch.from_numpy(img).permute(2, 0, 1)
71
+ img = self.normalize(img)
72
+ depth = torch.from_numpy(depth).unsqueeze(0)
73
+ return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims')
74
+
75
+ def __len__(self):
76
+ return len(self.samples)
77
+
78
+
79
+ def get_ibims_loader(config, batch_size=1, **kwargs):
80
+ dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs)
81
+ return dataloader
metric_depth/zoedepth/data/preprocess.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import numpy as np
26
+ from dataclasses import dataclass
27
+ from typing import Tuple, List
28
+
29
+ # dataclass to store the crop parameters
30
+ @dataclass
31
+ class CropParams:
32
+ top: int
33
+ bottom: int
34
+ left: int
35
+ right: int
36
+
37
+
38
+
39
+ def get_border_params(rgb_image, tolerance=0.1, cut_off=20, value=0, level_diff_threshold=5, channel_axis=-1, min_border=5) -> CropParams:
40
+ gray_image = np.mean(rgb_image, axis=channel_axis)
41
+ h, w = gray_image.shape
42
+
43
+
44
+ def num_value_pixels(arr):
45
+ return np.sum(np.abs(arr - value) < level_diff_threshold)
46
+
47
+ def is_above_tolerance(arr, total_pixels):
48
+ return (num_value_pixels(arr) / total_pixels) > tolerance
49
+
50
+ # Crop top border until number of value pixels become below tolerance
51
+ top = min_border
52
+ while is_above_tolerance(gray_image[top, :], w) and top < h-1:
53
+ top += 1
54
+ if top > cut_off:
55
+ break
56
+
57
+ # Crop bottom border until number of value pixels become below tolerance
58
+ bottom = h - min_border
59
+ while is_above_tolerance(gray_image[bottom, :], w) and bottom > 0:
60
+ bottom -= 1
61
+ if h - bottom > cut_off:
62
+ break
63
+
64
+ # Crop left border until number of value pixels become below tolerance
65
+ left = min_border
66
+ while is_above_tolerance(gray_image[:, left], h) and left < w-1:
67
+ left += 1
68
+ if left > cut_off:
69
+ break
70
+
71
+ # Crop right border until number of value pixels become below tolerance
72
+ right = w - min_border
73
+ while is_above_tolerance(gray_image[:, right], h) and right > 0:
74
+ right -= 1
75
+ if w - right > cut_off:
76
+ break
77
+
78
+
79
+ return CropParams(top, bottom, left, right)
80
+
81
+
82
+ def get_white_border(rgb_image, value=255, **kwargs) -> CropParams:
83
+ """Crops the white border of the RGB.
84
+
85
+ Args:
86
+ rgb: RGB image, shape (H, W, 3).
87
+ Returns:
88
+ Crop parameters.
89
+ """
90
+ if value == 255:
91
+ # assert range of values in rgb image is [0, 255]
92
+ assert np.max(rgb_image) <= 255 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 255]."
93
+ assert rgb_image.max() > 1, "RGB image values are not in range [0, 255]."
94
+ elif value == 1:
95
+ # assert range of values in rgb image is [0, 1]
96
+ assert np.max(rgb_image) <= 1 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 1]."
97
+
98
+ return get_border_params(rgb_image, value=value, **kwargs)
99
+
100
+ def get_black_border(rgb_image, **kwargs) -> CropParams:
101
+ """Crops the black border of the RGB.
102
+
103
+ Args:
104
+ rgb: RGB image, shape (H, W, 3).
105
+
106
+ Returns:
107
+ Crop parameters.
108
+ """
109
+
110
+ return get_border_params(rgb_image, value=0, **kwargs)
111
+
112
+ def crop_image(image: np.ndarray, crop_params: CropParams) -> np.ndarray:
113
+ """Crops the image according to the crop parameters.
114
+
115
+ Args:
116
+ image: RGB or depth image, shape (H, W, 3) or (H, W).
117
+ crop_params: Crop parameters.
118
+
119
+ Returns:
120
+ Cropped image.
121
+ """
122
+ return image[crop_params.top:crop_params.bottom, crop_params.left:crop_params.right]
123
+
124
+ def crop_images(*images: np.ndarray, crop_params: CropParams) -> Tuple[np.ndarray]:
125
+ """Crops the images according to the crop parameters.
126
+
127
+ Args:
128
+ images: RGB or depth images, shape (H, W, 3) or (H, W).
129
+ crop_params: Crop parameters.
130
+
131
+ Returns:
132
+ Cropped images.
133
+ """
134
+ return tuple(crop_image(image, crop_params) for image in images)
135
+
136
+ def crop_black_or_white_border(rgb_image, *other_images: np.ndarray, tolerance=0.1, cut_off=20, level_diff_threshold=5) -> Tuple[np.ndarray]:
137
+ """Crops the white and black border of the RGB and depth images.
138
+
139
+ Args:
140
+ rgb: RGB image, shape (H, W, 3). This image is used to determine the border.
141
+ other_images: The other images to crop according to the border of the RGB image.
142
+ Returns:
143
+ Cropped RGB and other images.
144
+ """
145
+ # crop black border
146
+ crop_params = get_black_border(rgb_image, tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold)
147
+ cropped_images = crop_images(rgb_image, *other_images, crop_params=crop_params)
148
+
149
+ # crop white border
150
+ crop_params = get_white_border(cropped_images[0], tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold)
151
+ cropped_images = crop_images(*cropped_images, crop_params=crop_params)
152
+
153
+ return cropped_images
154
+
metric_depth/zoedepth/data/sun_rgbd_loader.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import os
26
+
27
+ import numpy as np
28
+ import torch
29
+ from PIL import Image
30
+ from torch.utils.data import DataLoader, Dataset
31
+ from torchvision import transforms
32
+
33
+
34
+ class ToTensor(object):
35
+ def __init__(self):
36
+ # self.normalize = transforms.Normalize(
37
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
38
+ self.normalize = lambda x : x
39
+
40
+ def __call__(self, sample):
41
+ image, depth = sample['image'], sample['depth']
42
+ image = self.to_tensor(image)
43
+ image = self.normalize(image)
44
+ depth = self.to_tensor(depth)
45
+
46
+ return {'image': image, 'depth': depth, 'dataset': "sunrgbd"}
47
+
48
+ def to_tensor(self, pic):
49
+
50
+ if isinstance(pic, np.ndarray):
51
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
52
+ return img
53
+
54
+ # # handle PIL Image
55
+ if pic.mode == 'I':
56
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
57
+ elif pic.mode == 'I;16':
58
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
59
+ else:
60
+ img = torch.ByteTensor(
61
+ torch.ByteStorage.from_buffer(pic.tobytes()))
62
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
63
+ if pic.mode == 'YCbCr':
64
+ nchannel = 3
65
+ elif pic.mode == 'I;16':
66
+ nchannel = 1
67
+ else:
68
+ nchannel = len(pic.mode)
69
+ img = img.view(pic.size[1], pic.size[0], nchannel)
70
+
71
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
72
+ if isinstance(img, torch.ByteTensor):
73
+ return img.float()
74
+ else:
75
+ return img
76
+
77
+
78
+ class SunRGBD(Dataset):
79
+ def __init__(self, data_dir_root):
80
+ # test_file_dirs = loadmat(train_test_file)['alltest'].squeeze()
81
+ # all_test = [t[0].replace("/n/fs/sun3d/data/", "") for t in test_file_dirs]
82
+ # self.all_test = [os.path.join(data_dir_root, t) for t in all_test]
83
+ import glob
84
+ # self.image_files = glob.glob(
85
+ # os.path.join(data_dir_root, 'rgb', 'rgb', '*'))
86
+ # self.depth_files = [
87
+ # r.replace("rgb/rgb", "gt/gt").replace("jpg", "png") for r in self.image_files]
88
+
89
+ self.image_files, self.depth_files = [], []
90
+ filenames = os.listdir(os.path.join(data_dir_root, 'rgb'))
91
+ for i, filename in enumerate(filenames):
92
+ self.image_files.append(os.path.join(data_dir_root, 'rgb', filename))
93
+ base_num = int(filename.replace('.jpg', '').replace('img-', ''))
94
+ self.depth_files.append(os.path.join(data_dir_root, 'depth', str(base_num) + '.png'))
95
+
96
+ self.transform = ToTensor()
97
+
98
+ def __getitem__(self, idx):
99
+ image_path = self.image_files[idx]
100
+ depth_path = self.depth_files[idx]
101
+
102
+ image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
103
+ depth = np.asarray(Image.open(depth_path), dtype='uint16') / 10000.0
104
+ # print(depth, depth.min(), depth.max())
105
+ depth[depth > 8] = -1
106
+ depth = depth[..., None]
107
+ return self.transform(dict(image=image, depth=depth))
108
+
109
+ def __len__(self):
110
+ return len(self.image_files)
111
+
112
+
113
+ def get_sunrgbd_loader(data_dir_root, batch_size=1, **kwargs):
114
+ dataset = SunRGBD(data_dir_root)
115
+ return DataLoader(dataset, batch_size, **kwargs)
metric_depth/zoedepth/data/transforms.py ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import math
26
+ import random
27
+
28
+ import cv2
29
+ import numpy as np
30
+
31
+
32
+ class RandomFliplr(object):
33
+ """Horizontal flip of the sample with given probability.
34
+ """
35
+
36
+ def __init__(self, probability=0.5):
37
+ """Init.
38
+
39
+ Args:
40
+ probability (float, optional): Flip probability. Defaults to 0.5.
41
+ """
42
+ self.__probability = probability
43
+
44
+ def __call__(self, sample):
45
+ prob = random.random()
46
+
47
+ if prob < self.__probability:
48
+ for k, v in sample.items():
49
+ if len(v.shape) >= 2:
50
+ sample[k] = np.fliplr(v).copy()
51
+
52
+ return sample
53
+
54
+
55
+ def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
56
+ """Rezise the sample to ensure the given size. Keeps aspect ratio.
57
+
58
+ Args:
59
+ sample (dict): sample
60
+ size (tuple): image size
61
+
62
+ Returns:
63
+ tuple: new size
64
+ """
65
+ shape = list(sample["disparity"].shape)
66
+
67
+ if shape[0] >= size[0] and shape[1] >= size[1]:
68
+ return sample
69
+
70
+ scale = [0, 0]
71
+ scale[0] = size[0] / shape[0]
72
+ scale[1] = size[1] / shape[1]
73
+
74
+ scale = max(scale)
75
+
76
+ shape[0] = math.ceil(scale * shape[0])
77
+ shape[1] = math.ceil(scale * shape[1])
78
+
79
+ # resize
80
+ sample["image"] = cv2.resize(
81
+ sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method
82
+ )
83
+
84
+ sample["disparity"] = cv2.resize(
85
+ sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST
86
+ )
87
+ sample["mask"] = cv2.resize(
88
+ sample["mask"].astype(np.float32),
89
+ tuple(shape[::-1]),
90
+ interpolation=cv2.INTER_NEAREST,
91
+ )
92
+ sample["mask"] = sample["mask"].astype(bool)
93
+
94
+ return tuple(shape)
95
+
96
+
97
+ class RandomCrop(object):
98
+ """Get a random crop of the sample with the given size (width, height).
99
+ """
100
+
101
+ def __init__(
102
+ self,
103
+ width,
104
+ height,
105
+ resize_if_needed=False,
106
+ image_interpolation_method=cv2.INTER_AREA,
107
+ ):
108
+ """Init.
109
+
110
+ Args:
111
+ width (int): output width
112
+ height (int): output height
113
+ resize_if_needed (bool, optional): If True, sample might be upsampled to ensure
114
+ that a crop of size (width, height) is possbile. Defaults to False.
115
+ """
116
+ self.__size = (height, width)
117
+ self.__resize_if_needed = resize_if_needed
118
+ self.__image_interpolation_method = image_interpolation_method
119
+
120
+ def __call__(self, sample):
121
+
122
+ shape = sample["disparity"].shape
123
+
124
+ if self.__size[0] > shape[0] or self.__size[1] > shape[1]:
125
+ if self.__resize_if_needed:
126
+ shape = apply_min_size(
127
+ sample, self.__size, self.__image_interpolation_method
128
+ )
129
+ else:
130
+ raise Exception(
131
+ "Output size {} bigger than input size {}.".format(
132
+ self.__size, shape
133
+ )
134
+ )
135
+
136
+ offset = (
137
+ np.random.randint(shape[0] - self.__size[0] + 1),
138
+ np.random.randint(shape[1] - self.__size[1] + 1),
139
+ )
140
+
141
+ for k, v in sample.items():
142
+ if k == "code" or k == "basis":
143
+ continue
144
+
145
+ if len(sample[k].shape) >= 2:
146
+ sample[k] = v[
147
+ offset[0]: offset[0] + self.__size[0],
148
+ offset[1]: offset[1] + self.__size[1],
149
+ ]
150
+
151
+ return sample
152
+
153
+
154
+ class Resize(object):
155
+ """Resize sample to given size (width, height).
156
+ """
157
+
158
+ def __init__(
159
+ self,
160
+ width,
161
+ height,
162
+ resize_target=True,
163
+ keep_aspect_ratio=False,
164
+ ensure_multiple_of=1,
165
+ resize_method="lower_bound",
166
+ image_interpolation_method=cv2.INTER_AREA,
167
+ letter_box=False,
168
+ ):
169
+ """Init.
170
+
171
+ Args:
172
+ width (int): desired output width
173
+ height (int): desired output height
174
+ resize_target (bool, optional):
175
+ True: Resize the full sample (image, mask, target).
176
+ False: Resize image only.
177
+ Defaults to True.
178
+ keep_aspect_ratio (bool, optional):
179
+ True: Keep the aspect ratio of the input sample.
180
+ Output sample might not have the given width and height, and
181
+ resize behaviour depends on the parameter 'resize_method'.
182
+ Defaults to False.
183
+ ensure_multiple_of (int, optional):
184
+ Output width and height is constrained to be multiple of this parameter.
185
+ Defaults to 1.
186
+ resize_method (str, optional):
187
+ "lower_bound": Output will be at least as large as the given size.
188
+ "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
189
+ "minimal": Scale as least as possible. (Output size might be smaller than given size.)
190
+ Defaults to "lower_bound".
191
+ """
192
+ self.__width = width
193
+ self.__height = height
194
+
195
+ self.__resize_target = resize_target
196
+ self.__keep_aspect_ratio = keep_aspect_ratio
197
+ self.__multiple_of = ensure_multiple_of
198
+ self.__resize_method = resize_method
199
+ self.__image_interpolation_method = image_interpolation_method
200
+ self.__letter_box = letter_box
201
+
202
+ def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
203
+ y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
204
+
205
+ if max_val is not None and y > max_val:
206
+ y = (np.floor(x / self.__multiple_of)
207
+ * self.__multiple_of).astype(int)
208
+
209
+ if y < min_val:
210
+ y = (np.ceil(x / self.__multiple_of)
211
+ * self.__multiple_of).astype(int)
212
+
213
+ return y
214
+
215
+ def get_size(self, width, height):
216
+ # determine new height and width
217
+ scale_height = self.__height / height
218
+ scale_width = self.__width / width
219
+
220
+ if self.__keep_aspect_ratio:
221
+ if self.__resize_method == "lower_bound":
222
+ # scale such that output size is lower bound
223
+ if scale_width > scale_height:
224
+ # fit width
225
+ scale_height = scale_width
226
+ else:
227
+ # fit height
228
+ scale_width = scale_height
229
+ elif self.__resize_method == "upper_bound":
230
+ # scale such that output size is upper bound
231
+ if scale_width < scale_height:
232
+ # fit width
233
+ scale_height = scale_width
234
+ else:
235
+ # fit height
236
+ scale_width = scale_height
237
+ elif self.__resize_method == "minimal":
238
+ # scale as least as possbile
239
+ if abs(1 - scale_width) < abs(1 - scale_height):
240
+ # fit width
241
+ scale_height = scale_width
242
+ else:
243
+ # fit height
244
+ scale_width = scale_height
245
+ else:
246
+ raise ValueError(
247
+ f"resize_method {self.__resize_method} not implemented"
248
+ )
249
+
250
+ if self.__resize_method == "lower_bound":
251
+ new_height = self.constrain_to_multiple_of(
252
+ scale_height * height, min_val=self.__height
253
+ )
254
+ new_width = self.constrain_to_multiple_of(
255
+ scale_width * width, min_val=self.__width
256
+ )
257
+ elif self.__resize_method == "upper_bound":
258
+ new_height = self.constrain_to_multiple_of(
259
+ scale_height * height, max_val=self.__height
260
+ )
261
+ new_width = self.constrain_to_multiple_of(
262
+ scale_width * width, max_val=self.__width
263
+ )
264
+ elif self.__resize_method == "minimal":
265
+ new_height = self.constrain_to_multiple_of(scale_height * height)
266
+ new_width = self.constrain_to_multiple_of(scale_width * width)
267
+ else:
268
+ raise ValueError(
269
+ f"resize_method {self.__resize_method} not implemented")
270
+
271
+ return (new_width, new_height)
272
+
273
+ def make_letter_box(self, sample):
274
+ top = bottom = (self.__height - sample.shape[0]) // 2
275
+ left = right = (self.__width - sample.shape[1]) // 2
276
+ sample = cv2.copyMakeBorder(
277
+ sample, top, bottom, left, right, cv2.BORDER_CONSTANT, None, 0)
278
+ return sample
279
+
280
+ def __call__(self, sample):
281
+ width, height = self.get_size(
282
+ sample["image"].shape[1], sample["image"].shape[0]
283
+ )
284
+
285
+ # resize sample
286
+ sample["image"] = cv2.resize(
287
+ sample["image"],
288
+ (width, height),
289
+ interpolation=self.__image_interpolation_method,
290
+ )
291
+
292
+ if self.__letter_box:
293
+ sample["image"] = self.make_letter_box(sample["image"])
294
+
295
+ if self.__resize_target:
296
+ if "disparity" in sample:
297
+ sample["disparity"] = cv2.resize(
298
+ sample["disparity"],
299
+ (width, height),
300
+ interpolation=cv2.INTER_NEAREST,
301
+ )
302
+
303
+ if self.__letter_box:
304
+ sample["disparity"] = self.make_letter_box(
305
+ sample["disparity"])
306
+
307
+ if "depth" in sample:
308
+ sample["depth"] = cv2.resize(
309
+ sample["depth"], (width,
310
+ height), interpolation=cv2.INTER_NEAREST
311
+ )
312
+
313
+ if self.__letter_box:
314
+ sample["depth"] = self.make_letter_box(sample["depth"])
315
+
316
+ sample["mask"] = cv2.resize(
317
+ sample["mask"].astype(np.float32),
318
+ (width, height),
319
+ interpolation=cv2.INTER_NEAREST,
320
+ )
321
+
322
+ if self.__letter_box:
323
+ sample["mask"] = self.make_letter_box(sample["mask"])
324
+
325
+ sample["mask"] = sample["mask"].astype(bool)
326
+
327
+ return sample
328
+
329
+
330
+ class ResizeFixed(object):
331
+ def __init__(self, size):
332
+ self.__size = size
333
+
334
+ def __call__(self, sample):
335
+ sample["image"] = cv2.resize(
336
+ sample["image"], self.__size[::-1], interpolation=cv2.INTER_LINEAR
337
+ )
338
+
339
+ sample["disparity"] = cv2.resize(
340
+ sample["disparity"], self.__size[::-
341
+ 1], interpolation=cv2.INTER_NEAREST
342
+ )
343
+
344
+ sample["mask"] = cv2.resize(
345
+ sample["mask"].astype(np.float32),
346
+ self.__size[::-1],
347
+ interpolation=cv2.INTER_NEAREST,
348
+ )
349
+ sample["mask"] = sample["mask"].astype(bool)
350
+
351
+ return sample
352
+
353
+
354
+ class Rescale(object):
355
+ """Rescale target values to the interval [0, max_val].
356
+ If input is constant, values are set to max_val / 2.
357
+ """
358
+
359
+ def __init__(self, max_val=1.0, use_mask=True):
360
+ """Init.
361
+
362
+ Args:
363
+ max_val (float, optional): Max output value. Defaults to 1.0.
364
+ use_mask (bool, optional): Only operate on valid pixels (mask == True). Defaults to True.
365
+ """
366
+ self.__max_val = max_val
367
+ self.__use_mask = use_mask
368
+
369
+ def __call__(self, sample):
370
+ disp = sample["disparity"]
371
+
372
+ if self.__use_mask:
373
+ mask = sample["mask"]
374
+ else:
375
+ mask = np.ones_like(disp, dtype=np.bool)
376
+
377
+ if np.sum(mask) == 0:
378
+ return sample
379
+
380
+ min_val = np.min(disp[mask])
381
+ max_val = np.max(disp[mask])
382
+
383
+ if max_val > min_val:
384
+ sample["disparity"][mask] = (
385
+ (disp[mask] - min_val) / (max_val - min_val) * self.__max_val
386
+ )
387
+ else:
388
+ sample["disparity"][mask] = np.ones_like(
389
+ disp[mask]) * self.__max_val / 2.0
390
+
391
+ return sample
392
+
393
+
394
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
395
+ class NormalizeImage(object):
396
+ """Normlize image by given mean and std.
397
+ """
398
+
399
+ def __init__(self, mean, std):
400
+ self.__mean = mean
401
+ self.__std = std
402
+
403
+ def __call__(self, sample):
404
+ sample["image"] = (sample["image"] - self.__mean) / self.__std
405
+
406
+ return sample
407
+
408
+
409
+ class DepthToDisparity(object):
410
+ """Convert depth to disparity. Removes depth from sample.
411
+ """
412
+
413
+ def __init__(self, eps=1e-4):
414
+ self.__eps = eps
415
+
416
+ def __call__(self, sample):
417
+ assert "depth" in sample
418
+
419
+ sample["mask"][sample["depth"] < self.__eps] = False
420
+
421
+ sample["disparity"] = np.zeros_like(sample["depth"])
422
+ sample["disparity"][sample["depth"] >= self.__eps] = (
423
+ 1.0 / sample["depth"][sample["depth"] >= self.__eps]
424
+ )
425
+
426
+ del sample["depth"]
427
+
428
+ return sample
429
+
430
+
431
+ class DisparityToDepth(object):
432
+ """Convert disparity to depth. Removes disparity from sample.
433
+ """
434
+
435
+ def __init__(self, eps=1e-4):
436
+ self.__eps = eps
437
+
438
+ def __call__(self, sample):
439
+ assert "disparity" in sample
440
+
441
+ disp = np.abs(sample["disparity"])
442
+ sample["mask"][disp < self.__eps] = False
443
+
444
+ # print(sample["disparity"])
445
+ # print(sample["mask"].sum())
446
+ # exit()
447
+
448
+ sample["depth"] = np.zeros_like(disp)
449
+ sample["depth"][disp >= self.__eps] = (
450
+ 1.0 / disp[disp >= self.__eps]
451
+ )
452
+
453
+ del sample["disparity"]
454
+
455
+ return sample
456
+
457
+
458
+ class PrepareForNet(object):
459
+ """Prepare sample for usage as network input.
460
+ """
461
+
462
+ def __init__(self):
463
+ pass
464
+
465
+ def __call__(self, sample):
466
+ image = np.transpose(sample["image"], (2, 0, 1))
467
+ sample["image"] = np.ascontiguousarray(image).astype(np.float32)
468
+
469
+ if "mask" in sample:
470
+ sample["mask"] = sample["mask"].astype(np.float32)
471
+ sample["mask"] = np.ascontiguousarray(sample["mask"])
472
+
473
+ if "disparity" in sample:
474
+ disparity = sample["disparity"].astype(np.float32)
475
+ sample["disparity"] = np.ascontiguousarray(disparity)
476
+
477
+ if "depth" in sample:
478
+ depth = sample["depth"].astype(np.float32)
479
+ sample["depth"] = np.ascontiguousarray(depth)
480
+
481
+ return sample
metric_depth/zoedepth/data/vkitti.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ from torch.utils.data import Dataset, DataLoader
27
+ from torchvision import transforms
28
+ import os
29
+
30
+ from PIL import Image
31
+ import numpy as np
32
+ import cv2
33
+
34
+
35
+ class ToTensor(object):
36
+ def __init__(self):
37
+ self.normalize = transforms.Normalize(
38
+ mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
39
+ # self.resize = transforms.Resize((375, 1242))
40
+
41
+ def __call__(self, sample):
42
+ image, depth = sample['image'], sample['depth']
43
+
44
+ image = self.to_tensor(image)
45
+ image = self.normalize(image)
46
+ depth = self.to_tensor(depth)
47
+
48
+ # image = self.resize(image)
49
+
50
+ return {'image': image, 'depth': depth, 'dataset': "vkitti"}
51
+
52
+ def to_tensor(self, pic):
53
+
54
+ if isinstance(pic, np.ndarray):
55
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
56
+ return img
57
+
58
+ # # handle PIL Image
59
+ if pic.mode == 'I':
60
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
61
+ elif pic.mode == 'I;16':
62
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
63
+ else:
64
+ img = torch.ByteTensor(
65
+ torch.ByteStorage.from_buffer(pic.tobytes()))
66
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
67
+ if pic.mode == 'YCbCr':
68
+ nchannel = 3
69
+ elif pic.mode == 'I;16':
70
+ nchannel = 1
71
+ else:
72
+ nchannel = len(pic.mode)
73
+ img = img.view(pic.size[1], pic.size[0], nchannel)
74
+
75
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
76
+ if isinstance(img, torch.ByteTensor):
77
+ return img.float()
78
+ else:
79
+ return img
80
+
81
+
82
+ class VKITTI(Dataset):
83
+ def __init__(self, data_dir_root, do_kb_crop=True):
84
+ import glob
85
+ # image paths are of the form <data_dir_root>/{HR, LR}/<scene>/{color, depth_filled}/*.png
86
+ self.image_files = glob.glob(os.path.join(
87
+ data_dir_root, "test_color", '*.png'))
88
+ self.depth_files = [r.replace("test_color", "test_depth")
89
+ for r in self.image_files]
90
+ self.do_kb_crop = True
91
+ self.transform = ToTensor()
92
+
93
+ def __getitem__(self, idx):
94
+ image_path = self.image_files[idx]
95
+ depth_path = self.depth_files[idx]
96
+
97
+ image = Image.open(image_path)
98
+ depth = Image.open(depth_path)
99
+ depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR |
100
+ cv2.IMREAD_ANYDEPTH)
101
+ print("dpeth min max", depth.min(), depth.max())
102
+
103
+ # print(np.shape(image))
104
+ # print(np.shape(depth))
105
+
106
+ # depth[depth > 8] = -1
107
+
108
+ if self.do_kb_crop and False:
109
+ height = image.height
110
+ width = image.width
111
+ top_margin = int(height - 352)
112
+ left_margin = int((width - 1216) / 2)
113
+ depth = depth.crop(
114
+ (left_margin, top_margin, left_margin + 1216, top_margin + 352))
115
+ image = image.crop(
116
+ (left_margin, top_margin, left_margin + 1216, top_margin + 352))
117
+ # uv = uv[:, top_margin:top_margin + 352, left_margin:left_margin + 1216]
118
+
119
+ image = np.asarray(image, dtype=np.float32) / 255.0
120
+ # depth = np.asarray(depth, dtype=np.uint16) /1.
121
+ depth = depth[..., None]
122
+ sample = dict(image=image, depth=depth)
123
+
124
+ # return sample
125
+ sample = self.transform(sample)
126
+
127
+ if idx == 0:
128
+ print(sample["image"].shape)
129
+
130
+ return sample
131
+
132
+ def __len__(self):
133
+ return len(self.image_files)
134
+
135
+
136
+ def get_vkitti_loader(data_dir_root, batch_size=1, **kwargs):
137
+ dataset = VKITTI(data_dir_root)
138
+ return DataLoader(dataset, batch_size, **kwargs)
139
+
140
+
141
+ if __name__ == "__main__":
142
+ loader = get_vkitti_loader(
143
+ data_dir_root="/home/bhatsf/shortcuts/datasets/vkitti_test")
144
+ print("Total files", len(loader.dataset))
145
+ for i, sample in enumerate(loader):
146
+ print(sample["image"].shape)
147
+ print(sample["depth"].shape)
148
+ print(sample["dataset"])
149
+ print(sample['depth'].min(), sample['depth'].max())
150
+ if i > 5:
151
+ break
metric_depth/zoedepth/data/vkitti2.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import os
26
+
27
+ import cv2
28
+ import numpy as np
29
+ import torch
30
+ from PIL import Image
31
+ from torch.utils.data import DataLoader, Dataset
32
+ from torchvision import transforms
33
+
34
+
35
+ class ToTensor(object):
36
+ def __init__(self):
37
+ # self.normalize = transforms.Normalize(
38
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
39
+ self.normalize = lambda x: x
40
+ # self.resize = transforms.Resize((375, 1242))
41
+
42
+ def __call__(self, sample):
43
+ image, depth = sample['image'], sample['depth']
44
+
45
+ image = self.to_tensor(image)
46
+ image = self.normalize(image)
47
+ depth = self.to_tensor(depth)
48
+
49
+ # image = self.resize(image)
50
+
51
+ return {'image': image, 'depth': depth, 'dataset': "vkitti"}
52
+
53
+ def to_tensor(self, pic):
54
+
55
+ if isinstance(pic, np.ndarray):
56
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
57
+ return img
58
+
59
+ # # handle PIL Image
60
+ if pic.mode == 'I':
61
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
62
+ elif pic.mode == 'I;16':
63
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
64
+ else:
65
+ img = torch.ByteTensor(
66
+ torch.ByteStorage.from_buffer(pic.tobytes()))
67
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
68
+ if pic.mode == 'YCbCr':
69
+ nchannel = 3
70
+ elif pic.mode == 'I;16':
71
+ nchannel = 1
72
+ else:
73
+ nchannel = len(pic.mode)
74
+ img = img.view(pic.size[1], pic.size[0], nchannel)
75
+
76
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
77
+ if isinstance(img, torch.ByteTensor):
78
+ return img.float()
79
+ else:
80
+ return img
81
+
82
+
83
+ class VKITTI2(Dataset):
84
+ def __init__(self, data_dir_root, do_kb_crop=True, split="test"):
85
+ import glob
86
+
87
+ # image paths are of the form <data_dir_root>/rgb/<scene>/<variant>/frames/<rgb,depth>/Camera<0,1>/rgb_{}.jpg
88
+ self.image_files = glob.glob(os.path.join(
89
+ data_dir_root, "**", "frames", "rgb", "Camera_0", '*.jpg'), recursive=True)
90
+ self.depth_files = [r.replace("/rgb/", "/depth/").replace(
91
+ "rgb_", "depth_").replace(".jpg", ".png") for r in self.image_files]
92
+ self.do_kb_crop = True
93
+ self.transform = ToTensor()
94
+
95
+ # If train test split is not created, then create one.
96
+ # Split is such that 8% of the frames from each scene are used for testing.
97
+ if not os.path.exists(os.path.join(data_dir_root, "train.txt")):
98
+ import random
99
+ scenes = set([os.path.basename(os.path.dirname(
100
+ os.path.dirname(os.path.dirname(f)))) for f in self.image_files])
101
+ train_files = []
102
+ test_files = []
103
+ for scene in scenes:
104
+ scene_files = [f for f in self.image_files if os.path.basename(
105
+ os.path.dirname(os.path.dirname(os.path.dirname(f)))) == scene]
106
+ random.shuffle(scene_files)
107
+ train_files.extend(scene_files[:int(len(scene_files) * 0.92)])
108
+ test_files.extend(scene_files[int(len(scene_files) * 0.92):])
109
+ with open(os.path.join(data_dir_root, "train.txt"), "w") as f:
110
+ f.write("\n".join(train_files))
111
+ with open(os.path.join(data_dir_root, "test.txt"), "w") as f:
112
+ f.write("\n".join(test_files))
113
+
114
+ if split == "train":
115
+ with open(os.path.join(data_dir_root, "train.txt"), "r") as f:
116
+ self.image_files = f.read().splitlines()
117
+ self.depth_files = [r.replace("/rgb/", "/depth/").replace(
118
+ "rgb_", "depth_").replace(".jpg", ".png") for r in self.image_files]
119
+ elif split == "test":
120
+ with open(os.path.join(data_dir_root, "test.txt"), "r") as f:
121
+ self.image_files = f.read().splitlines()
122
+ self.depth_files = [r.replace("/rgb/", "/depth/").replace(
123
+ "rgb_", "depth_").replace(".jpg", ".png") for r in self.image_files]
124
+
125
+ def __getitem__(self, idx):
126
+ image_path = self.image_files[idx]
127
+ depth_path = self.depth_files[idx]
128
+
129
+ image = Image.open(image_path)
130
+ # depth = Image.open(depth_path)
131
+ depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR |
132
+ cv2.IMREAD_ANYDEPTH) / 100.0 # cm to m
133
+ depth = Image.fromarray(depth)
134
+ # print("dpeth min max", depth.min(), depth.max())
135
+
136
+ # print(np.shape(image))
137
+ # print(np.shape(depth))
138
+
139
+ if self.do_kb_crop:
140
+ if idx == 0:
141
+ print("Using KB input crop")
142
+ height = image.height
143
+ width = image.width
144
+ top_margin = int(height - 352)
145
+ left_margin = int((width - 1216) / 2)
146
+ depth = depth.crop(
147
+ (left_margin, top_margin, left_margin + 1216, top_margin + 352))
148
+ image = image.crop(
149
+ (left_margin, top_margin, left_margin + 1216, top_margin + 352))
150
+ # uv = uv[:, top_margin:top_margin + 352, left_margin:left_margin + 1216]
151
+
152
+ image = np.asarray(image, dtype=np.float32) / 255.0
153
+ # depth = np.asarray(depth, dtype=np.uint16) /1.
154
+ depth = np.asarray(depth, dtype=np.float32) / 1.
155
+ depth[depth > 80] = -1
156
+
157
+ depth = depth[..., None]
158
+ sample = dict(image=image, depth=depth)
159
+
160
+ # return sample
161
+ sample = self.transform(sample)
162
+
163
+ if idx == 0:
164
+ print(sample["image"].shape)
165
+
166
+ return sample
167
+
168
+ def __len__(self):
169
+ return len(self.image_files)
170
+
171
+
172
+ def get_vkitti2_loader(data_dir_root, batch_size=1, **kwargs):
173
+ dataset = VKITTI2(data_dir_root)
174
+ return DataLoader(dataset, batch_size, **kwargs)
175
+
176
+
177
+ if __name__ == "__main__":
178
+ loader = get_vkitti2_loader(
179
+ data_dir_root="/home/bhatsf/shortcuts/datasets/vkitti2")
180
+ print("Total files", len(loader.dataset))
181
+ for i, sample in enumerate(loader):
182
+ print(sample["image"].shape)
183
+ print(sample["depth"].shape)
184
+ print(sample["dataset"])
185
+ print(sample['depth'].min(), sample['depth'].max())
186
+ if i > 5:
187
+ break
metric_depth/zoedepth/models/__init__.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
metric_depth/zoedepth/models/base_models/__init__.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
metric_depth/zoedepth/models/base_models/depth_anything.py ADDED
@@ -0,0 +1,376 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+ import numpy as np
28
+ from torchvision.transforms import Normalize
29
+ from zoedepth.models.base_models.dpt_dinov2.dpt import DPT_DINOv2
30
+
31
+
32
+ def denormalize(x):
33
+ """Reverses the imagenet normalization applied to the input.
34
+
35
+ Args:
36
+ x (torch.Tensor - shape(N,3,H,W)): input tensor
37
+
38
+ Returns:
39
+ torch.Tensor - shape(N,3,H,W): Denormalized input
40
+ """
41
+ mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device)
42
+ std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device)
43
+ return x * std + mean
44
+
45
+ def get_activation(name, bank):
46
+ def hook(model, input, output):
47
+ bank[name] = output
48
+ return hook
49
+
50
+
51
+ class Resize(object):
52
+ """Resize sample to given size (width, height).
53
+ """
54
+
55
+ def __init__(
56
+ self,
57
+ width,
58
+ height,
59
+ resize_target=True,
60
+ keep_aspect_ratio=False,
61
+ ensure_multiple_of=1,
62
+ resize_method="lower_bound",
63
+ ):
64
+ """Init.
65
+ Args:
66
+ width (int): desired output width
67
+ height (int): desired output height
68
+ resize_target (bool, optional):
69
+ True: Resize the full sample (image, mask, target).
70
+ False: Resize image only.
71
+ Defaults to True.
72
+ keep_aspect_ratio (bool, optional):
73
+ True: Keep the aspect ratio of the input sample.
74
+ Output sample might not have the given width and height, and
75
+ resize behaviour depends on the parameter 'resize_method'.
76
+ Defaults to False.
77
+ ensure_multiple_of (int, optional):
78
+ Output width and height is constrained to be multiple of this parameter.
79
+ Defaults to 1.
80
+ resize_method (str, optional):
81
+ "lower_bound": Output will be at least as large as the given size.
82
+ "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
83
+ "minimal": Scale as least as possible. (Output size might be smaller than given size.)
84
+ Defaults to "lower_bound".
85
+ """
86
+ print("Params passed to Resize transform:")
87
+ print("\twidth: ", width)
88
+ print("\theight: ", height)
89
+ print("\tresize_target: ", resize_target)
90
+ print("\tkeep_aspect_ratio: ", keep_aspect_ratio)
91
+ print("\tensure_multiple_of: ", ensure_multiple_of)
92
+ print("\tresize_method: ", resize_method)
93
+
94
+ self.__width = width
95
+ self.__height = height
96
+
97
+ self.__keep_aspect_ratio = keep_aspect_ratio
98
+ self.__multiple_of = ensure_multiple_of
99
+ self.__resize_method = resize_method
100
+
101
+ def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
102
+ y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
103
+
104
+ if max_val is not None and y > max_val:
105
+ y = (np.floor(x / self.__multiple_of)
106
+ * self.__multiple_of).astype(int)
107
+
108
+ if y < min_val:
109
+ y = (np.ceil(x / self.__multiple_of)
110
+ * self.__multiple_of).astype(int)
111
+
112
+ return y
113
+
114
+ def get_size(self, width, height):
115
+ # determine new height and width
116
+ scale_height = self.__height / height
117
+ scale_width = self.__width / width
118
+
119
+ if self.__keep_aspect_ratio:
120
+ if self.__resize_method == "lower_bound":
121
+ # scale such that output size is lower bound
122
+ if scale_width > scale_height:
123
+ # fit width
124
+ scale_height = scale_width
125
+ else:
126
+ # fit height
127
+ scale_width = scale_height
128
+ elif self.__resize_method == "upper_bound":
129
+ # scale such that output size is upper bound
130
+ if scale_width < scale_height:
131
+ # fit width
132
+ scale_height = scale_width
133
+ else:
134
+ # fit height
135
+ scale_width = scale_height
136
+ elif self.__resize_method == "minimal":
137
+ # scale as least as possbile
138
+ if abs(1 - scale_width) < abs(1 - scale_height):
139
+ # fit width
140
+ scale_height = scale_width
141
+ else:
142
+ # fit height
143
+ scale_width = scale_height
144
+ else:
145
+ raise ValueError(
146
+ f"resize_method {self.__resize_method} not implemented"
147
+ )
148
+
149
+ if self.__resize_method == "lower_bound":
150
+ new_height = self.constrain_to_multiple_of(
151
+ scale_height * height, min_val=self.__height
152
+ )
153
+ new_width = self.constrain_to_multiple_of(
154
+ scale_width * width, min_val=self.__width
155
+ )
156
+ elif self.__resize_method == "upper_bound":
157
+ new_height = self.constrain_to_multiple_of(
158
+ scale_height * height, max_val=self.__height
159
+ )
160
+ new_width = self.constrain_to_multiple_of(
161
+ scale_width * width, max_val=self.__width
162
+ )
163
+ elif self.__resize_method == "minimal":
164
+ new_height = self.constrain_to_multiple_of(scale_height * height)
165
+ new_width = self.constrain_to_multiple_of(scale_width * width)
166
+ else:
167
+ raise ValueError(
168
+ f"resize_method {self.__resize_method} not implemented")
169
+
170
+ return (new_width, new_height)
171
+
172
+ def __call__(self, x):
173
+ width, height = self.get_size(*x.shape[-2:][::-1])
174
+ return nn.functional.interpolate(x, (height, width), mode='bilinear', align_corners=True)
175
+
176
+ class PrepForMidas(object):
177
+ def __init__(self, resize_mode="minimal", keep_aspect_ratio=True, img_size=384, do_resize=True):
178
+ if isinstance(img_size, int):
179
+ img_size = (img_size, img_size)
180
+ net_h, net_w = img_size
181
+ # self.normalization = Normalize(
182
+ # mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
183
+ self.normalization = Normalize(
184
+ mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
185
+ self.resizer = Resize(net_w, net_h, keep_aspect_ratio=keep_aspect_ratio, ensure_multiple_of=14, resize_method=resize_mode) \
186
+ if do_resize else nn.Identity()
187
+
188
+ def __call__(self, x):
189
+ return self.normalization(self.resizer(x))
190
+
191
+
192
+ class DepthAnythingCore(nn.Module):
193
+ def __init__(self, midas, trainable=False, fetch_features=True, layer_names=('out_conv', 'l4_rn', 'r4', 'r3', 'r2', 'r1'), freeze_bn=False, keep_aspect_ratio=True,
194
+ img_size=384, **kwargs):
195
+ """Midas Base model used for multi-scale feature extraction.
196
+
197
+ Args:
198
+ midas (torch.nn.Module): Midas model.
199
+ trainable (bool, optional): Train midas model. Defaults to False.
200
+ fetch_features (bool, optional): Extract multi-scale features. Defaults to True.
201
+ layer_names (tuple, optional): Layers used for feature extraction. Order = (head output features, last layer features, ...decoder features). Defaults to ('out_conv', 'l4_rn', 'r4', 'r3', 'r2', 'r1').
202
+ freeze_bn (bool, optional): Freeze BatchNorm. Generally results in better finetuning performance. Defaults to False.
203
+ keep_aspect_ratio (bool, optional): Keep the aspect ratio of input images while resizing. Defaults to True.
204
+ img_size (int, tuple, optional): Input resolution. Defaults to 384.
205
+ """
206
+ super().__init__()
207
+ self.core = midas
208
+ self.output_channels = None
209
+ self.core_out = {}
210
+ self.trainable = trainable
211
+ self.fetch_features = fetch_features
212
+ # midas.scratch.output_conv = nn.Identity()
213
+ self.handles = []
214
+ # self.layer_names = ['out_conv','l4_rn', 'r4', 'r3', 'r2', 'r1']
215
+ self.layer_names = layer_names
216
+
217
+ self.set_trainable(trainable)
218
+ self.set_fetch_features(fetch_features)
219
+
220
+ self.prep = PrepForMidas(keep_aspect_ratio=keep_aspect_ratio,
221
+ img_size=img_size, do_resize=kwargs.get('do_resize', True))
222
+
223
+ if freeze_bn:
224
+ self.freeze_bn()
225
+
226
+ def set_trainable(self, trainable):
227
+ self.trainable = trainable
228
+ if trainable:
229
+ self.unfreeze()
230
+ else:
231
+ self.freeze()
232
+ return self
233
+
234
+ def set_fetch_features(self, fetch_features):
235
+ self.fetch_features = fetch_features
236
+ if fetch_features:
237
+ if len(self.handles) == 0:
238
+ self.attach_hooks(self.core)
239
+ else:
240
+ self.remove_hooks()
241
+ return self
242
+
243
+ def freeze(self):
244
+ for p in self.parameters():
245
+ p.requires_grad = False
246
+ self.trainable = False
247
+ return self
248
+
249
+ def unfreeze(self):
250
+ for p in self.parameters():
251
+ p.requires_grad = True
252
+ self.trainable = True
253
+ return self
254
+
255
+ def freeze_bn(self):
256
+ for m in self.modules():
257
+ if isinstance(m, nn.BatchNorm2d):
258
+ m.eval()
259
+ return self
260
+
261
+ def forward(self, x, denorm=False, return_rel_depth=False):
262
+ # print('input to midas:', x.shape)
263
+ with torch.no_grad():
264
+ if denorm:
265
+ x = denormalize(x)
266
+ x = self.prep(x)
267
+
268
+ with torch.set_grad_enabled(self.trainable):
269
+
270
+ rel_depth = self.core(x)
271
+ if not self.fetch_features:
272
+ return rel_depth
273
+ out = [self.core_out[k] for k in self.layer_names]
274
+
275
+ if return_rel_depth:
276
+ return rel_depth, out
277
+ return out
278
+
279
+ def get_rel_pos_params(self):
280
+ for name, p in self.core.pretrained.named_parameters():
281
+ if "pos_embed" in name:
282
+ yield p
283
+
284
+ def get_enc_params_except_rel_pos(self):
285
+ for name, p in self.core.pretrained.named_parameters():
286
+ if "pos_embed" not in name:
287
+ yield p
288
+
289
+ def freeze_encoder(self, freeze_rel_pos=False):
290
+ if freeze_rel_pos:
291
+ for p in self.core.pretrained.parameters():
292
+ p.requires_grad = False
293
+ else:
294
+ for p in self.get_enc_params_except_rel_pos():
295
+ p.requires_grad = False
296
+ return self
297
+
298
+ def attach_hooks(self, midas):
299
+ if len(self.handles) > 0:
300
+ self.remove_hooks()
301
+ if "out_conv" in self.layer_names:
302
+ self.handles.append(list(midas.depth_head.scratch.output_conv2.children())[
303
+ 1].register_forward_hook(get_activation("out_conv", self.core_out)))
304
+ if "r4" in self.layer_names:
305
+ self.handles.append(midas.depth_head.scratch.refinenet4.register_forward_hook(
306
+ get_activation("r4", self.core_out)))
307
+ if "r3" in self.layer_names:
308
+ self.handles.append(midas.depth_head.scratch.refinenet3.register_forward_hook(
309
+ get_activation("r3", self.core_out)))
310
+ if "r2" in self.layer_names:
311
+ self.handles.append(midas.depth_head.scratch.refinenet2.register_forward_hook(
312
+ get_activation("r2", self.core_out)))
313
+ if "r1" in self.layer_names:
314
+ self.handles.append(midas.depth_head.scratch.refinenet1.register_forward_hook(
315
+ get_activation("r1", self.core_out)))
316
+ if "l4_rn" in self.layer_names:
317
+ self.handles.append(midas.depth_head.scratch.layer4_rn.register_forward_hook(
318
+ get_activation("l4_rn", self.core_out)))
319
+
320
+ return self
321
+
322
+ def remove_hooks(self):
323
+ for h in self.handles:
324
+ h.remove()
325
+ return self
326
+
327
+ def __del__(self):
328
+ self.remove_hooks()
329
+
330
+ def set_output_channels(self):
331
+ self.output_channels = [256, 256, 256, 256, 256]
332
+
333
+ @staticmethod
334
+ def build(midas_model_type="dinov2_large", train_midas=False, use_pretrained_midas=True, fetch_features=False, freeze_bn=True, force_keep_ar=False, force_reload=False, **kwargs):
335
+ if "img_size" in kwargs:
336
+ kwargs = DepthAnythingCore.parse_img_size(kwargs)
337
+ img_size = kwargs.pop("img_size", [384, 384])
338
+
339
+ depth_anything = DPT_DINOv2(out_channels=[256, 512, 1024, 1024], use_clstoken=False)
340
+
341
+ state_dict = torch.load('./checkpoints/depth_anything_vitl14.pth', map_location='cpu')
342
+ depth_anything.load_state_dict(state_dict)
343
+
344
+ kwargs.update({'keep_aspect_ratio': force_keep_ar})
345
+
346
+ depth_anything_core = DepthAnythingCore(depth_anything, trainable=train_midas, fetch_features=fetch_features,
347
+ freeze_bn=freeze_bn, img_size=img_size, **kwargs)
348
+
349
+ depth_anything_core.set_output_channels()
350
+ return depth_anything_core
351
+
352
+ @staticmethod
353
+ def parse_img_size(config):
354
+ assert 'img_size' in config
355
+ if isinstance(config['img_size'], str):
356
+ assert "," in config['img_size'], "img_size should be a string with comma separated img_size=H,W"
357
+ config['img_size'] = list(map(int, config['img_size'].split(",")))
358
+ assert len(
359
+ config['img_size']) == 2, "img_size should be a string with comma separated img_size=H,W"
360
+ elif isinstance(config['img_size'], int):
361
+ config['img_size'] = [config['img_size'], config['img_size']]
362
+ else:
363
+ assert isinstance(config['img_size'], list) and len(
364
+ config['img_size']) == 2, "img_size should be a list of H,W"
365
+ return config
366
+
367
+
368
+ nchannels2models = {
369
+ tuple([256]*5): ["DPT_BEiT_L_384", "DPT_BEiT_L_512", "DPT_BEiT_B_384", "DPT_SwinV2_L_384", "DPT_SwinV2_B_384", "DPT_SwinV2_T_256", "DPT_Large", "DPT_Hybrid"],
370
+ (512, 256, 128, 64, 64): ["MiDaS_small"]
371
+ }
372
+
373
+ # Model name to number of output channels
374
+ MIDAS_SETTINGS = {m: k for k, v in nchannels2models.items()
375
+ for m in v
376
+ }
metric_depth/zoedepth/models/base_models/dpt_dinov2/blocks.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+
3
+
4
+ def _make_scratch(in_shape, out_shape, groups=1, expand=False):
5
+ scratch = nn.Module()
6
+
7
+ out_shape1 = out_shape
8
+ out_shape2 = out_shape
9
+ out_shape3 = out_shape
10
+ if len(in_shape) >= 4:
11
+ out_shape4 = out_shape
12
+
13
+ if expand:
14
+ out_shape1 = out_shape
15
+ out_shape2 = out_shape*2
16
+ out_shape3 = out_shape*4
17
+ if len(in_shape) >= 4:
18
+ out_shape4 = out_shape*8
19
+
20
+ scratch.layer1_rn = nn.Conv2d(
21
+ in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
22
+ )
23
+ scratch.layer2_rn = nn.Conv2d(
24
+ in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
25
+ )
26
+ scratch.layer3_rn = nn.Conv2d(
27
+ in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
28
+ )
29
+ if len(in_shape) >= 4:
30
+ scratch.layer4_rn = nn.Conv2d(
31
+ in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
32
+ )
33
+
34
+ return scratch
35
+
36
+
37
+ class ResidualConvUnit(nn.Module):
38
+ """Residual convolution module.
39
+ """
40
+
41
+ def __init__(self, features, activation, bn):
42
+ """Init.
43
+
44
+ Args:
45
+ features (int): number of features
46
+ """
47
+ super().__init__()
48
+
49
+ self.bn = bn
50
+
51
+ self.groups=1
52
+
53
+ self.conv1 = nn.Conv2d(
54
+ features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups
55
+ )
56
+
57
+ self.conv2 = nn.Conv2d(
58
+ features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups
59
+ )
60
+
61
+ if self.bn==True:
62
+ self.bn1 = nn.BatchNorm2d(features)
63
+ self.bn2 = nn.BatchNorm2d(features)
64
+
65
+ self.activation = activation
66
+
67
+ self.skip_add = nn.quantized.FloatFunctional()
68
+
69
+ def forward(self, x):
70
+ """Forward pass.
71
+
72
+ Args:
73
+ x (tensor): input
74
+
75
+ Returns:
76
+ tensor: output
77
+ """
78
+
79
+ out = self.activation(x)
80
+ out = self.conv1(out)
81
+ if self.bn==True:
82
+ out = self.bn1(out)
83
+
84
+ out = self.activation(out)
85
+ out = self.conv2(out)
86
+ if self.bn==True:
87
+ out = self.bn2(out)
88
+
89
+ if self.groups > 1:
90
+ out = self.conv_merge(out)
91
+
92
+ return self.skip_add.add(out, x)
93
+
94
+
95
+ class FeatureFusionBlock(nn.Module):
96
+ """Feature fusion block.
97
+ """
98
+
99
+ def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True, size=None):
100
+ """Init.
101
+
102
+ Args:
103
+ features (int): number of features
104
+ """
105
+ super(FeatureFusionBlock, self).__init__()
106
+
107
+ self.deconv = deconv
108
+ self.align_corners = align_corners
109
+
110
+ self.groups=1
111
+
112
+ self.expand = expand
113
+ out_features = features
114
+ if self.expand==True:
115
+ out_features = features//2
116
+
117
+ self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
118
+
119
+ self.resConfUnit1 = ResidualConvUnit(features, activation, bn)
120
+ self.resConfUnit2 = ResidualConvUnit(features, activation, bn)
121
+
122
+ self.skip_add = nn.quantized.FloatFunctional()
123
+
124
+ self.size=size
125
+
126
+ def forward(self, *xs, size=None):
127
+ """Forward pass.
128
+
129
+ Returns:
130
+ tensor: output
131
+ """
132
+ output = xs[0]
133
+
134
+ if len(xs) == 2:
135
+ res = self.resConfUnit1(xs[1])
136
+ output = self.skip_add.add(output, res)
137
+
138
+ output = self.resConfUnit2(output)
139
+
140
+ if (size is None) and (self.size is None):
141
+ modifier = {"scale_factor": 2}
142
+ elif size is None:
143
+ modifier = {"size": self.size}
144
+ else:
145
+ modifier = {"size": size}
146
+
147
+ output = nn.functional.interpolate(
148
+ output, **modifier, mode="bilinear", align_corners=self.align_corners
149
+ )
150
+
151
+ output = self.out_conv(output)
152
+
153
+ return output
metric_depth/zoedepth/models/base_models/dpt_dinov2/dpt.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from .blocks import FeatureFusionBlock, _make_scratch
5
+ import torch.nn.functional as F
6
+
7
+
8
+ def _make_fusion_block(features, use_bn, size = None):
9
+ return FeatureFusionBlock(
10
+ features,
11
+ nn.ReLU(False),
12
+ deconv=False,
13
+ bn=use_bn,
14
+ expand=False,
15
+ align_corners=True,
16
+ size=size,
17
+ )
18
+
19
+
20
+ class DPTHead(nn.Module):
21
+ def __init__(self, in_channels, features=256, use_bn=False, out_channels=[256, 512, 1024, 1024], use_clstoken=False):
22
+ super(DPTHead, self).__init__()
23
+
24
+ self.use_clstoken = use_clstoken
25
+
26
+ # out_channels = [in_channels // 8, in_channels // 4, in_channels // 2, in_channels]
27
+ # out_channels = [in_channels // 4, in_channels // 2, in_channels, in_channels]
28
+ # out_channels = [in_channels, in_channels, in_channels, in_channels]
29
+
30
+ self.projects = nn.ModuleList([
31
+ nn.Conv2d(
32
+ in_channels=in_channels,
33
+ out_channels=out_channel,
34
+ kernel_size=1,
35
+ stride=1,
36
+ padding=0,
37
+ ) for out_channel in out_channels
38
+ ])
39
+
40
+ self.resize_layers = nn.ModuleList([
41
+ nn.ConvTranspose2d(
42
+ in_channels=out_channels[0],
43
+ out_channels=out_channels[0],
44
+ kernel_size=4,
45
+ stride=4,
46
+ padding=0),
47
+ nn.ConvTranspose2d(
48
+ in_channels=out_channels[1],
49
+ out_channels=out_channels[1],
50
+ kernel_size=2,
51
+ stride=2,
52
+ padding=0),
53
+ nn.Identity(),
54
+ nn.Conv2d(
55
+ in_channels=out_channels[3],
56
+ out_channels=out_channels[3],
57
+ kernel_size=3,
58
+ stride=2,
59
+ padding=1)
60
+ ])
61
+
62
+ if use_clstoken:
63
+ self.readout_projects = nn.ModuleList()
64
+ for _ in range(len(self.projects)):
65
+ self.readout_projects.append(
66
+ nn.Sequential(
67
+ nn.Linear(2 * in_channels, in_channels),
68
+ nn.GELU()))
69
+
70
+ self.scratch = _make_scratch(
71
+ out_channels,
72
+ features,
73
+ groups=1,
74
+ expand=False,
75
+ )
76
+
77
+ self.scratch.stem_transpose = None
78
+
79
+ self.scratch.refinenet1 = _make_fusion_block(features, use_bn)
80
+ self.scratch.refinenet2 = _make_fusion_block(features, use_bn)
81
+ self.scratch.refinenet3 = _make_fusion_block(features, use_bn)
82
+ self.scratch.refinenet4 = _make_fusion_block(features, use_bn)
83
+
84
+ head_features_1 = features
85
+ head_features_2 = 32
86
+
87
+ self.scratch.output_conv1 = nn.Conv2d(head_features_1, head_features_1 // 2, kernel_size=3, stride=1, padding=1)
88
+
89
+ self.scratch.output_conv2 = nn.Sequential(
90
+ nn.Conv2d(head_features_1 // 2, head_features_2, kernel_size=3, stride=1, padding=1),
91
+ nn.ReLU(True),
92
+ nn.Conv2d(head_features_2, 1, kernel_size=1, stride=1, padding=0),
93
+ nn.ReLU(True),
94
+ nn.Identity(),
95
+ )
96
+
97
+ def forward(self, out_features, patch_h, patch_w):
98
+ out = []
99
+ for i, x in enumerate(out_features):
100
+ if self.use_clstoken:
101
+ x, cls_token = x[0], x[1]
102
+ readout = cls_token.unsqueeze(1).expand_as(x)
103
+ x = self.readout_projects[i](torch.cat((x, readout), -1))
104
+ else:
105
+ x = x[0]
106
+
107
+ x = x.permute(0, 2, 1).reshape((x.shape[0], x.shape[-1], patch_h, patch_w))
108
+
109
+ x = self.projects[i](x)
110
+ x = self.resize_layers[i](x)
111
+
112
+ out.append(x)
113
+
114
+ layer_1, layer_2, layer_3, layer_4 = out
115
+
116
+ layer_1_rn = self.scratch.layer1_rn(layer_1)
117
+ layer_2_rn = self.scratch.layer2_rn(layer_2)
118
+ layer_3_rn = self.scratch.layer3_rn(layer_3)
119
+ layer_4_rn = self.scratch.layer4_rn(layer_4)
120
+
121
+ path_4 = self.scratch.refinenet4(layer_4_rn, size=layer_3_rn.shape[2:])
122
+ path_3 = self.scratch.refinenet3(path_4, layer_3_rn, size=layer_2_rn.shape[2:])
123
+ path_2 = self.scratch.refinenet2(path_3, layer_2_rn, size=layer_1_rn.shape[2:])
124
+ path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
125
+
126
+ out = self.scratch.output_conv1(path_1)
127
+ out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True)
128
+ out = self.scratch.output_conv2(out)
129
+
130
+ return out
131
+
132
+
133
+ class DPT_DINOv2(nn.Module):
134
+ def __init__(self, encoder='vitl', features=256, use_bn=False, out_channels=[256, 512, 1024, 1024], use_clstoken=False):
135
+
136
+ super(DPT_DINOv2, self).__init__()
137
+
138
+ torch.manual_seed(1)
139
+
140
+ self.pretrained = torch.hub.load('../torchhub/facebookresearch_dinov2_main', 'dinov2_{:}14'.format(encoder), source='local', pretrained=False)
141
+
142
+ dim = self.pretrained.blocks[0].attn.qkv.in_features
143
+
144
+ self.depth_head = DPTHead(dim, features, use_bn, out_channels=out_channels, use_clstoken=use_clstoken)
145
+
146
+ def forward(self, x):
147
+ h, w = x.shape[-2:]
148
+
149
+ features = self.pretrained.get_intermediate_layers(x, 4, return_class_token=True)
150
+
151
+ patch_h, patch_w = h // 14, w // 14
152
+
153
+ depth = self.depth_head(features, patch_h, patch_w)
154
+ depth = F.interpolate(depth, size=(h, w), mode="bilinear", align_corners=True)
155
+ depth = F.relu(depth)
156
+
157
+ return depth.squeeze(1)
metric_depth/zoedepth/models/base_models/midas.py ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+ import numpy as np
28
+ from torchvision.transforms import Normalize
29
+
30
+
31
+ def denormalize(x):
32
+ """Reverses the imagenet normalization applied to the input.
33
+
34
+ Args:
35
+ x (torch.Tensor - shape(N,3,H,W)): input tensor
36
+
37
+ Returns:
38
+ torch.Tensor - shape(N,3,H,W): Denormalized input
39
+ """
40
+ mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device)
41
+ std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device)
42
+ return x * std + mean
43
+
44
+ def get_activation(name, bank):
45
+ def hook(model, input, output):
46
+ bank[name] = output
47
+ return hook
48
+
49
+
50
+ class Resize(object):
51
+ """Resize sample to given size (width, height).
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ width,
57
+ height,
58
+ resize_target=True,
59
+ keep_aspect_ratio=False,
60
+ ensure_multiple_of=1,
61
+ resize_method="lower_bound",
62
+ ):
63
+ """Init.
64
+ Args:
65
+ width (int): desired output width
66
+ height (int): desired output height
67
+ resize_target (bool, optional):
68
+ True: Resize the full sample (image, mask, target).
69
+ False: Resize image only.
70
+ Defaults to True.
71
+ keep_aspect_ratio (bool, optional):
72
+ True: Keep the aspect ratio of the input sample.
73
+ Output sample might not have the given width and height, and
74
+ resize behaviour depends on the parameter 'resize_method'.
75
+ Defaults to False.
76
+ ensure_multiple_of (int, optional):
77
+ Output width and height is constrained to be multiple of this parameter.
78
+ Defaults to 1.
79
+ resize_method (str, optional):
80
+ "lower_bound": Output will be at least as large as the given size.
81
+ "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
82
+ "minimal": Scale as least as possible. (Output size might be smaller than given size.)
83
+ Defaults to "lower_bound".
84
+ """
85
+ print("Params passed to Resize transform:")
86
+ print("\twidth: ", width)
87
+ print("\theight: ", height)
88
+ print("\tresize_target: ", resize_target)
89
+ print("\tkeep_aspect_ratio: ", keep_aspect_ratio)
90
+ print("\tensure_multiple_of: ", ensure_multiple_of)
91
+ print("\tresize_method: ", resize_method)
92
+
93
+ self.__width = width
94
+ self.__height = height
95
+
96
+ self.__keep_aspect_ratio = keep_aspect_ratio
97
+ self.__multiple_of = ensure_multiple_of
98
+ self.__resize_method = resize_method
99
+
100
+ def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
101
+ y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
102
+
103
+ if max_val is not None and y > max_val:
104
+ y = (np.floor(x / self.__multiple_of)
105
+ * self.__multiple_of).astype(int)
106
+
107
+ if y < min_val:
108
+ y = (np.ceil(x / self.__multiple_of)
109
+ * self.__multiple_of).astype(int)
110
+
111
+ return y
112
+
113
+ def get_size(self, width, height):
114
+ # determine new height and width
115
+ scale_height = self.__height / height
116
+ scale_width = self.__width / width
117
+
118
+ if self.__keep_aspect_ratio:
119
+ if self.__resize_method == "lower_bound":
120
+ # scale such that output size is lower bound
121
+ if scale_width > scale_height:
122
+ # fit width
123
+ scale_height = scale_width
124
+ else:
125
+ # fit height
126
+ scale_width = scale_height
127
+ elif self.__resize_method == "upper_bound":
128
+ # scale such that output size is upper bound
129
+ if scale_width < scale_height:
130
+ # fit width
131
+ scale_height = scale_width
132
+ else:
133
+ # fit height
134
+ scale_width = scale_height
135
+ elif self.__resize_method == "minimal":
136
+ # scale as least as possbile
137
+ if abs(1 - scale_width) < abs(1 - scale_height):
138
+ # fit width
139
+ scale_height = scale_width
140
+ else:
141
+ # fit height
142
+ scale_width = scale_height
143
+ else:
144
+ raise ValueError(
145
+ f"resize_method {self.__resize_method} not implemented"
146
+ )
147
+
148
+ if self.__resize_method == "lower_bound":
149
+ new_height = self.constrain_to_multiple_of(
150
+ scale_height * height, min_val=self.__height
151
+ )
152
+ new_width = self.constrain_to_multiple_of(
153
+ scale_width * width, min_val=self.__width
154
+ )
155
+ elif self.__resize_method == "upper_bound":
156
+ new_height = self.constrain_to_multiple_of(
157
+ scale_height * height, max_val=self.__height
158
+ )
159
+ new_width = self.constrain_to_multiple_of(
160
+ scale_width * width, max_val=self.__width
161
+ )
162
+ elif self.__resize_method == "minimal":
163
+ new_height = self.constrain_to_multiple_of(scale_height * height)
164
+ new_width = self.constrain_to_multiple_of(scale_width * width)
165
+ else:
166
+ raise ValueError(
167
+ f"resize_method {self.__resize_method} not implemented")
168
+
169
+ return (new_width, new_height)
170
+
171
+ def __call__(self, x):
172
+ width, height = self.get_size(*x.shape[-2:][::-1])
173
+ return nn.functional.interpolate(x, (height, width), mode='bilinear', align_corners=True)
174
+
175
+ class PrepForMidas(object):
176
+ def __init__(self, resize_mode="minimal", keep_aspect_ratio=True, img_size=384, do_resize=True):
177
+ if isinstance(img_size, int):
178
+ img_size = (img_size, img_size)
179
+ net_h, net_w = img_size
180
+ self.normalization = Normalize(
181
+ mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
182
+ self.resizer = Resize(net_w, net_h, keep_aspect_ratio=keep_aspect_ratio, ensure_multiple_of=32, resize_method=resize_mode) \
183
+ if do_resize else nn.Identity()
184
+
185
+ def __call__(self, x):
186
+ return self.normalization(self.resizer(x))
187
+
188
+
189
+ class MidasCore(nn.Module):
190
+ def __init__(self, midas, trainable=False, fetch_features=True, layer_names=('out_conv', 'l4_rn', 'r4', 'r3', 'r2', 'r1'), freeze_bn=False, keep_aspect_ratio=True,
191
+ img_size=384, **kwargs):
192
+ """Midas Base model used for multi-scale feature extraction.
193
+
194
+ Args:
195
+ midas (torch.nn.Module): Midas model.
196
+ trainable (bool, optional): Train midas model. Defaults to False.
197
+ fetch_features (bool, optional): Extract multi-scale features. Defaults to True.
198
+ layer_names (tuple, optional): Layers used for feature extraction. Order = (head output features, last layer features, ...decoder features). Defaults to ('out_conv', 'l4_rn', 'r4', 'r3', 'r2', 'r1').
199
+ freeze_bn (bool, optional): Freeze BatchNorm. Generally results in better finetuning performance. Defaults to False.
200
+ keep_aspect_ratio (bool, optional): Keep the aspect ratio of input images while resizing. Defaults to True.
201
+ img_size (int, tuple, optional): Input resolution. Defaults to 384.
202
+ """
203
+ super().__init__()
204
+ self.core = midas
205
+ self.output_channels = None
206
+ self.core_out = {}
207
+ self.trainable = trainable
208
+ self.fetch_features = fetch_features
209
+ # midas.scratch.output_conv = nn.Identity()
210
+ self.handles = []
211
+ # self.layer_names = ['out_conv','l4_rn', 'r4', 'r3', 'r2', 'r1']
212
+ self.layer_names = layer_names
213
+
214
+ self.set_trainable(trainable)
215
+ self.set_fetch_features(fetch_features)
216
+
217
+ self.prep = PrepForMidas(keep_aspect_ratio=keep_aspect_ratio,
218
+ img_size=img_size, do_resize=kwargs.get('do_resize', True))
219
+
220
+ if freeze_bn:
221
+ self.freeze_bn()
222
+
223
+ def set_trainable(self, trainable):
224
+ self.trainable = trainable
225
+ if trainable:
226
+ self.unfreeze()
227
+ else:
228
+ self.freeze()
229
+ return self
230
+
231
+ def set_fetch_features(self, fetch_features):
232
+ self.fetch_features = fetch_features
233
+ if fetch_features:
234
+ if len(self.handles) == 0:
235
+ self.attach_hooks(self.core)
236
+ else:
237
+ self.remove_hooks()
238
+ return self
239
+
240
+ def freeze(self):
241
+ for p in self.parameters():
242
+ p.requires_grad = False
243
+ self.trainable = False
244
+ return self
245
+
246
+ def unfreeze(self):
247
+ for p in self.parameters():
248
+ p.requires_grad = True
249
+ self.trainable = True
250
+ return self
251
+
252
+ def freeze_bn(self):
253
+ for m in self.modules():
254
+ if isinstance(m, nn.BatchNorm2d):
255
+ m.eval()
256
+ return self
257
+
258
+ def forward(self, x, denorm=False, return_rel_depth=False):
259
+ # print('input to midas:', x.shape)
260
+ with torch.no_grad():
261
+ if denorm:
262
+ x = denormalize(x)
263
+ x = self.prep(x)
264
+ # print("Shape after prep: ", x.shape)
265
+ # print('pre-processed:', x.shape)
266
+
267
+ with torch.set_grad_enabled(self.trainable):
268
+
269
+ # print("Input size to Midascore", x.shape)
270
+ rel_depth = self.core(x)
271
+ # print("Output from midas shape", rel_depth.shape)
272
+ if not self.fetch_features:
273
+ return rel_depth
274
+ out = [self.core_out[k] for k in self.layer_names]
275
+
276
+ if return_rel_depth:
277
+ return rel_depth, out
278
+ return out
279
+
280
+ def get_rel_pos_params(self):
281
+ for name, p in self.core.pretrained.named_parameters():
282
+ if "relative_position" in name:
283
+ yield p
284
+
285
+ def get_enc_params_except_rel_pos(self):
286
+ for name, p in self.core.pretrained.named_parameters():
287
+ if "relative_position" not in name:
288
+ yield p
289
+
290
+ def freeze_encoder(self, freeze_rel_pos=False):
291
+ if freeze_rel_pos:
292
+ for p in self.core.pretrained.parameters():
293
+ p.requires_grad = False
294
+ else:
295
+ for p in self.get_enc_params_except_rel_pos():
296
+ p.requires_grad = False
297
+ return self
298
+
299
+ def attach_hooks(self, midas):
300
+ if len(self.handles) > 0:
301
+ self.remove_hooks()
302
+ if "out_conv" in self.layer_names:
303
+ self.handles.append(list(midas.scratch.output_conv.children())[
304
+ 3].register_forward_hook(get_activation("out_conv", self.core_out)))
305
+ if "r4" in self.layer_names:
306
+ self.handles.append(midas.scratch.refinenet4.register_forward_hook(
307
+ get_activation("r4", self.core_out)))
308
+ if "r3" in self.layer_names:
309
+ self.handles.append(midas.scratch.refinenet3.register_forward_hook(
310
+ get_activation("r3", self.core_out)))
311
+ if "r2" in self.layer_names:
312
+ self.handles.append(midas.scratch.refinenet2.register_forward_hook(
313
+ get_activation("r2", self.core_out)))
314
+ if "r1" in self.layer_names:
315
+ self.handles.append(midas.scratch.refinenet1.register_forward_hook(
316
+ get_activation("r1", self.core_out)))
317
+ if "l4_rn" in self.layer_names:
318
+ self.handles.append(midas.scratch.layer4_rn.register_forward_hook(
319
+ get_activation("l4_rn", self.core_out)))
320
+
321
+ return self
322
+
323
+ def remove_hooks(self):
324
+ for h in self.handles:
325
+ h.remove()
326
+ return self
327
+
328
+ def __del__(self):
329
+ self.remove_hooks()
330
+
331
+ def set_output_channels(self, model_type):
332
+ self.output_channels = MIDAS_SETTINGS[model_type]
333
+
334
+ @staticmethod
335
+ def build(midas_model_type="DPT_BEiT_L_384", train_midas=False, use_pretrained_midas=True, fetch_features=False, freeze_bn=True, force_keep_ar=False, force_reload=False, **kwargs):
336
+ if midas_model_type not in MIDAS_SETTINGS:
337
+ raise ValueError(
338
+ f"Invalid model type: {midas_model_type}. Must be one of {list(MIDAS_SETTINGS.keys())}")
339
+ if "img_size" in kwargs:
340
+ kwargs = MidasCore.parse_img_size(kwargs)
341
+ img_size = kwargs.pop("img_size", [384, 384])
342
+ # print("img_size", img_size)
343
+ midas = torch.hub.load("intel-isl/MiDaS", midas_model_type,
344
+ pretrained=use_pretrained_midas, force_reload=force_reload)
345
+ kwargs.update({'keep_aspect_ratio': force_keep_ar})
346
+ midas_core = MidasCore(midas, trainable=train_midas, fetch_features=fetch_features,
347
+ freeze_bn=freeze_bn, img_size=img_size, **kwargs)
348
+ midas_core.set_output_channels(midas_model_type)
349
+ return midas_core
350
+
351
+ @staticmethod
352
+ def build_from_config(config):
353
+ return MidasCore.build(**config)
354
+
355
+ @staticmethod
356
+ def parse_img_size(config):
357
+ assert 'img_size' in config
358
+ if isinstance(config['img_size'], str):
359
+ assert "," in config['img_size'], "img_size should be a string with comma separated img_size=H,W"
360
+ config['img_size'] = list(map(int, config['img_size'].split(",")))
361
+ assert len(
362
+ config['img_size']) == 2, "img_size should be a string with comma separated img_size=H,W"
363
+ elif isinstance(config['img_size'], int):
364
+ config['img_size'] = [config['img_size'], config['img_size']]
365
+ else:
366
+ assert isinstance(config['img_size'], list) and len(
367
+ config['img_size']) == 2, "img_size should be a list of H,W"
368
+ return config
369
+
370
+
371
+ nchannels2models = {
372
+ tuple([256]*5): ["DPT_BEiT_L_384", "DPT_BEiT_L_512", "DPT_BEiT_B_384", "DPT_SwinV2_L_384", "DPT_SwinV2_B_384", "DPT_SwinV2_T_256", "DPT_Large", "DPT_Hybrid"],
373
+ (512, 256, 128, 64, 64): ["MiDaS_small"]
374
+ }
375
+
376
+ # Model name to number of output channels
377
+ MIDAS_SETTINGS = {m: k for k, v in nchannels2models.items()
378
+ for m in v
379
+ }
380
+ # print('MIDAS_SETTINGS:', MIDAS_SETTINGS)
metric_depth/zoedepth/models/builder.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ from importlib import import_module
26
+ from zoedepth.models.depth_model import DepthModel
27
+
28
+ def build_model(config) -> DepthModel:
29
+ """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface.
30
+ This function should be used to construct models for training and evaluation.
31
+
32
+ Args:
33
+ config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder.
34
+
35
+ Returns:
36
+ torch.nn.Module: Model corresponding to name and version as specified in config
37
+ """
38
+ module_name = f"zoedepth.models.{config.model}"
39
+ try:
40
+ module = import_module(module_name)
41
+ except ModuleNotFoundError as e:
42
+ # print the original error message
43
+ print(e)
44
+ raise ValueError(
45
+ f"Model {config.model} not found. Refer above error for details.") from e
46
+ try:
47
+ get_version = getattr(module, "get_version")
48
+ except AttributeError as e:
49
+ raise ValueError(
50
+ f"Model {config.model} has no get_version function.") from e
51
+ return get_version(config.version_name).build_from_config(config)
metric_depth/zoedepth/models/depth_model.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import numpy as np
26
+ import torch
27
+ import torch.nn as nn
28
+ import torch.nn.functional as F
29
+ from torchvision import transforms
30
+ import PIL.Image
31
+ from PIL import Image
32
+ from typing import Union
33
+
34
+
35
+ class DepthModel(nn.Module):
36
+ def __init__(self):
37
+ super().__init__()
38
+ self.device = 'cpu'
39
+
40
+ def to(self, device) -> nn.Module:
41
+ self.device = device
42
+ return super().to(device)
43
+
44
+ def forward(self, x, *args, **kwargs):
45
+ raise NotImplementedError
46
+
47
+ def _infer(self, x: torch.Tensor):
48
+ """
49
+ Inference interface for the model
50
+ Args:
51
+ x (torch.Tensor): input tensor of shape (b, c, h, w)
52
+ Returns:
53
+ torch.Tensor: output tensor of shape (b, 1, h, w)
54
+ """
55
+ return self(x)['metric_depth']
56
+
57
+ def _infer_with_pad_aug(self, x: torch.Tensor, pad_input: bool=True, fh: float=3, fw: float=3, upsampling_mode: str='bicubic', padding_mode="reflect", **kwargs) -> torch.Tensor:
58
+ """
59
+ Inference interface for the model with padding augmentation
60
+ Padding augmentation fixes the boundary artifacts in the output depth map.
61
+ Boundary artifacts are sometimes caused by the fact that the model is trained on NYU raw dataset which has a black or white border around the image.
62
+ This augmentation pads the input image and crops the prediction back to the original size / view.
63
+
64
+ Note: This augmentation is not required for the models trained with 'avoid_boundary'=True.
65
+ Args:
66
+ x (torch.Tensor): input tensor of shape (b, c, h, w)
67
+ pad_input (bool, optional): whether to pad the input or not. Defaults to True.
68
+ fh (float, optional): height padding factor. The padding is calculated as sqrt(h/2) * fh. Defaults to 3.
69
+ fw (float, optional): width padding factor. The padding is calculated as sqrt(w/2) * fw. Defaults to 3.
70
+ upsampling_mode (str, optional): upsampling mode. Defaults to 'bicubic'.
71
+ padding_mode (str, optional): padding mode. Defaults to "reflect".
72
+ Returns:
73
+ torch.Tensor: output tensor of shape (b, 1, h, w)
74
+ """
75
+ # assert x is nchw and c = 3
76
+ assert x.dim() == 4, "x must be 4 dimensional, got {}".format(x.dim())
77
+ assert x.shape[1] == 3, "x must have 3 channels, got {}".format(x.shape[1])
78
+
79
+ if pad_input:
80
+ assert fh > 0 or fw > 0, "atlease one of fh and fw must be greater than 0"
81
+ pad_h = int(np.sqrt(x.shape[2]/2) * fh)
82
+ pad_w = int(np.sqrt(x.shape[3]/2) * fw)
83
+ padding = [pad_w, pad_w]
84
+ if pad_h > 0:
85
+ padding += [pad_h, pad_h]
86
+
87
+ x = F.pad(x, padding, mode=padding_mode, **kwargs)
88
+ out = self._infer(x)
89
+ if out.shape[-2:] != x.shape[-2:]:
90
+ out = F.interpolate(out, size=(x.shape[2], x.shape[3]), mode=upsampling_mode, align_corners=False)
91
+ if pad_input:
92
+ # crop to the original size, handling the case where pad_h and pad_w is 0
93
+ if pad_h > 0:
94
+ out = out[:, :, pad_h:-pad_h,:]
95
+ if pad_w > 0:
96
+ out = out[:, :, :, pad_w:-pad_w]
97
+ return out
98
+
99
+ def infer_with_flip_aug(self, x, pad_input: bool=True, **kwargs) -> torch.Tensor:
100
+ """
101
+ Inference interface for the model with horizontal flip augmentation
102
+ Horizontal flip augmentation improves the accuracy of the model by averaging the output of the model with and without horizontal flip.
103
+ Args:
104
+ x (torch.Tensor): input tensor of shape (b, c, h, w)
105
+ pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
106
+ Returns:
107
+ torch.Tensor: output tensor of shape (b, 1, h, w)
108
+ """
109
+ # infer with horizontal flip and average
110
+ out = self._infer_with_pad_aug(x, pad_input=pad_input, **kwargs)
111
+ out_flip = self._infer_with_pad_aug(torch.flip(x, dims=[3]), pad_input=pad_input, **kwargs)
112
+ out = (out + torch.flip(out_flip, dims=[3])) / 2
113
+ return out
114
+
115
+ def infer(self, x, pad_input: bool=True, with_flip_aug: bool=True, **kwargs) -> torch.Tensor:
116
+ """
117
+ Inference interface for the model
118
+ Args:
119
+ x (torch.Tensor): input tensor of shape (b, c, h, w)
120
+ pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
121
+ with_flip_aug (bool, optional): whether to use horizontal flip augmentation. Defaults to True.
122
+ Returns:
123
+ torch.Tensor: output tensor of shape (b, 1, h, w)
124
+ """
125
+ if with_flip_aug:
126
+ return self.infer_with_flip_aug(x, pad_input=pad_input, **kwargs)
127
+ else:
128
+ return self._infer_with_pad_aug(x, pad_input=pad_input, **kwargs)
129
+
130
+ @torch.no_grad()
131
+ def infer_pil(self, pil_img, pad_input: bool=True, with_flip_aug: bool=True, output_type: str="numpy", **kwargs) -> Union[np.ndarray, PIL.Image.Image, torch.Tensor]:
132
+ """
133
+ Inference interface for the model for PIL image
134
+ Args:
135
+ pil_img (PIL.Image.Image): input PIL image
136
+ pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
137
+ with_flip_aug (bool, optional): whether to use horizontal flip augmentation. Defaults to True.
138
+ output_type (str, optional): output type. Supported values are 'numpy', 'pil' and 'tensor'. Defaults to "numpy".
139
+ """
140
+ x = transforms.ToTensor()(pil_img).unsqueeze(0).to(self.device)
141
+ out_tensor = self.infer(x, pad_input=pad_input, with_flip_aug=with_flip_aug, **kwargs)
142
+ if output_type == "numpy":
143
+ return out_tensor.squeeze().cpu().numpy()
144
+ elif output_type == "pil":
145
+ # uint16 is required for depth pil image
146
+ out_16bit_numpy = (out_tensor.squeeze().cpu().numpy()*256).astype(np.uint16)
147
+ return Image.fromarray(out_16bit_numpy)
148
+ elif output_type == "tensor":
149
+ return out_tensor.squeeze().cpu()
150
+ else:
151
+ raise ValueError(f"output_type {output_type} not supported. Supported values are 'numpy', 'pil' and 'tensor'")
152
+
metric_depth/zoedepth/models/layers/attractor.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+
28
+
29
+ @torch.jit.script
30
+ def exp_attractor(dx, alpha: float = 300, gamma: int = 2):
31
+ """Exponential attractor: dc = exp(-alpha*|dx|^gamma) * dx , where dx = a - c, a = attractor point, c = bin center, dc = shift in bin centermmary for exp_attractor
32
+
33
+ Args:
34
+ dx (torch.Tensor): The difference tensor dx = Ai - Cj, where Ai is the attractor point and Cj is the bin center.
35
+ alpha (float, optional): Proportional Attractor strength. Determines the absolute strength. Lower alpha = greater attraction. Defaults to 300.
36
+ gamma (int, optional): Exponential Attractor strength. Determines the "region of influence" and indirectly number of bin centers affected. Lower gamma = farther reach. Defaults to 2.
37
+
38
+ Returns:
39
+ torch.Tensor : Delta shifts - dc; New bin centers = Old bin centers + dc
40
+ """
41
+ return torch.exp(-alpha*(torch.abs(dx)**gamma)) * (dx)
42
+
43
+
44
+ @torch.jit.script
45
+ def inv_attractor(dx, alpha: float = 300, gamma: int = 2):
46
+ """Inverse attractor: dc = dx / (1 + alpha*dx^gamma), where dx = a - c, a = attractor point, c = bin center, dc = shift in bin center
47
+ This is the default one according to the accompanying paper.
48
+
49
+ Args:
50
+ dx (torch.Tensor): The difference tensor dx = Ai - Cj, where Ai is the attractor point and Cj is the bin center.
51
+ alpha (float, optional): Proportional Attractor strength. Determines the absolute strength. Lower alpha = greater attraction. Defaults to 300.
52
+ gamma (int, optional): Exponential Attractor strength. Determines the "region of influence" and indirectly number of bin centers affected. Lower gamma = farther reach. Defaults to 2.
53
+
54
+ Returns:
55
+ torch.Tensor: Delta shifts - dc; New bin centers = Old bin centers + dc
56
+ """
57
+ return dx.div(1+alpha*dx.pow(gamma))
58
+
59
+
60
+ class AttractorLayer(nn.Module):
61
+ def __init__(self, in_features, n_bins, n_attractors=16, mlp_dim=128, min_depth=1e-3, max_depth=10,
62
+ alpha=300, gamma=2, kind='sum', attractor_type='exp', memory_efficient=False):
63
+ """
64
+ Attractor layer for bin centers. Bin centers are bounded on the interval (min_depth, max_depth)
65
+ """
66
+ super().__init__()
67
+
68
+ self.n_attractors = n_attractors
69
+ self.n_bins = n_bins
70
+ self.min_depth = min_depth
71
+ self.max_depth = max_depth
72
+ self.alpha = alpha
73
+ self.gamma = gamma
74
+ self.kind = kind
75
+ self.attractor_type = attractor_type
76
+ self.memory_efficient = memory_efficient
77
+
78
+ self._net = nn.Sequential(
79
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
80
+ nn.ReLU(inplace=True),
81
+ nn.Conv2d(mlp_dim, n_attractors*2, 1, 1, 0), # x2 for linear norm
82
+ nn.ReLU(inplace=True)
83
+ )
84
+
85
+ def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
86
+ """
87
+ Args:
88
+ x (torch.Tensor) : feature block; shape - n, c, h, w
89
+ b_prev (torch.Tensor) : previous bin centers normed; shape - n, prev_nbins, h, w
90
+
91
+ Returns:
92
+ tuple(torch.Tensor,torch.Tensor) : new bin centers normed and scaled; shape - n, nbins, h, w
93
+ """
94
+ if prev_b_embedding is not None:
95
+ if interpolate:
96
+ prev_b_embedding = nn.functional.interpolate(
97
+ prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
98
+ x = x + prev_b_embedding
99
+
100
+ A = self._net(x)
101
+ eps = 1e-3
102
+ A = A + eps
103
+ n, c, h, w = A.shape
104
+ A = A.view(n, self.n_attractors, 2, h, w)
105
+ A_normed = A / A.sum(dim=2, keepdim=True) # n, a, 2, h, w
106
+ A_normed = A[:, :, 0, ...] # n, na, h, w
107
+
108
+ b_prev = nn.functional.interpolate(
109
+ b_prev, (h, w), mode='bilinear', align_corners=True)
110
+ b_centers = b_prev
111
+
112
+ if self.attractor_type == 'exp':
113
+ dist = exp_attractor
114
+ else:
115
+ dist = inv_attractor
116
+
117
+ if not self.memory_efficient:
118
+ func = {'mean': torch.mean, 'sum': torch.sum}[self.kind]
119
+ # .shape N, nbins, h, w
120
+ delta_c = func(dist(A_normed.unsqueeze(
121
+ 2) - b_centers.unsqueeze(1)), dim=1)
122
+ else:
123
+ delta_c = torch.zeros_like(b_centers, device=b_centers.device)
124
+ for i in range(self.n_attractors):
125
+ # .shape N, nbins, h, w
126
+ delta_c += dist(A_normed[:, i, ...].unsqueeze(1) - b_centers)
127
+
128
+ if self.kind == 'mean':
129
+ delta_c = delta_c / self.n_attractors
130
+
131
+ b_new_centers = b_centers + delta_c
132
+ B_centers = (self.max_depth - self.min_depth) * \
133
+ b_new_centers + self.min_depth
134
+ B_centers, _ = torch.sort(B_centers, dim=1)
135
+ B_centers = torch.clip(B_centers, self.min_depth, self.max_depth)
136
+ return b_new_centers, B_centers
137
+
138
+
139
+ class AttractorLayerUnnormed(nn.Module):
140
+ def __init__(self, in_features, n_bins, n_attractors=16, mlp_dim=128, min_depth=1e-3, max_depth=10,
141
+ alpha=300, gamma=2, kind='sum', attractor_type='exp', memory_efficient=False):
142
+ """
143
+ Attractor layer for bin centers. Bin centers are unbounded
144
+ """
145
+ super().__init__()
146
+
147
+ self.n_attractors = n_attractors
148
+ self.n_bins = n_bins
149
+ self.min_depth = min_depth
150
+ self.max_depth = max_depth
151
+ self.alpha = alpha
152
+ self.gamma = gamma
153
+ self.kind = kind
154
+ self.attractor_type = attractor_type
155
+ self.memory_efficient = memory_efficient
156
+
157
+ self._net = nn.Sequential(
158
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
159
+ nn.ReLU(inplace=True),
160
+ nn.Conv2d(mlp_dim, n_attractors, 1, 1, 0),
161
+ nn.Softplus()
162
+ )
163
+
164
+ def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
165
+ """
166
+ Args:
167
+ x (torch.Tensor) : feature block; shape - n, c, h, w
168
+ b_prev (torch.Tensor) : previous bin centers normed; shape - n, prev_nbins, h, w
169
+
170
+ Returns:
171
+ tuple(torch.Tensor,torch.Tensor) : new bin centers unbounded; shape - n, nbins, h, w. Two outputs just to keep the API consistent with the normed version
172
+ """
173
+ if prev_b_embedding is not None:
174
+ if interpolate:
175
+ prev_b_embedding = nn.functional.interpolate(
176
+ prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
177
+ x = x + prev_b_embedding
178
+
179
+ A = self._net(x)
180
+ n, c, h, w = A.shape
181
+
182
+ b_prev = nn.functional.interpolate(
183
+ b_prev, (h, w), mode='bilinear', align_corners=True)
184
+ b_centers = b_prev
185
+
186
+ if self.attractor_type == 'exp':
187
+ dist = exp_attractor
188
+ else:
189
+ dist = inv_attractor
190
+
191
+ if not self.memory_efficient:
192
+ func = {'mean': torch.mean, 'sum': torch.sum}[self.kind]
193
+ # .shape N, nbins, h, w
194
+ delta_c = func(
195
+ dist(A.unsqueeze(2) - b_centers.unsqueeze(1)), dim=1)
196
+ else:
197
+ delta_c = torch.zeros_like(b_centers, device=b_centers.device)
198
+ for i in range(self.n_attractors):
199
+ delta_c += dist(A[:, i, ...].unsqueeze(1) -
200
+ b_centers) # .shape N, nbins, h, w
201
+
202
+ if self.kind == 'mean':
203
+ delta_c = delta_c / self.n_attractors
204
+
205
+ b_new_centers = b_centers + delta_c
206
+ B_centers = b_new_centers
207
+
208
+ return b_new_centers, B_centers
metric_depth/zoedepth/models/layers/dist_layers.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+
28
+
29
+ def log_binom(n, k, eps=1e-7):
30
+ """ log(nCk) using stirling approximation """
31
+ n = n + eps
32
+ k = k + eps
33
+ return n * torch.log(n) - k * torch.log(k) - (n-k) * torch.log(n-k+eps)
34
+
35
+
36
+ class LogBinomial(nn.Module):
37
+ def __init__(self, n_classes=256, act=torch.softmax):
38
+ """Compute log binomial distribution for n_classes
39
+
40
+ Args:
41
+ n_classes (int, optional): number of output classes. Defaults to 256.
42
+ """
43
+ super().__init__()
44
+ self.K = n_classes
45
+ self.act = act
46
+ self.register_buffer('k_idx', torch.arange(
47
+ 0, n_classes).view(1, -1, 1, 1))
48
+ self.register_buffer('K_minus_1', torch.Tensor(
49
+ [self.K-1]).view(1, -1, 1, 1))
50
+
51
+ def forward(self, x, t=1., eps=1e-4):
52
+ """Compute log binomial distribution for x
53
+
54
+ Args:
55
+ x (torch.Tensor - NCHW): probabilities
56
+ t (float, torch.Tensor - NCHW, optional): Temperature of distribution. Defaults to 1..
57
+ eps (float, optional): Small number for numerical stability. Defaults to 1e-4.
58
+
59
+ Returns:
60
+ torch.Tensor -NCHW: log binomial distribution logbinomial(p;t)
61
+ """
62
+ if x.ndim == 3:
63
+ x = x.unsqueeze(1) # make it nchw
64
+
65
+ one_minus_x = torch.clamp(1 - x, eps, 1)
66
+ x = torch.clamp(x, eps, 1)
67
+ y = log_binom(self.K_minus_1, self.k_idx) + self.k_idx * \
68
+ torch.log(x) + (self.K - 1 - self.k_idx) * torch.log(one_minus_x)
69
+ return self.act(y/t, dim=1)
70
+
71
+
72
+ class ConditionalLogBinomial(nn.Module):
73
+ def __init__(self, in_features, condition_dim, n_classes=256, bottleneck_factor=2, p_eps=1e-4, max_temp=50, min_temp=1e-7, act=torch.softmax):
74
+ """Conditional Log Binomial distribution
75
+
76
+ Args:
77
+ in_features (int): number of input channels in main feature
78
+ condition_dim (int): number of input channels in condition feature
79
+ n_classes (int, optional): Number of classes. Defaults to 256.
80
+ bottleneck_factor (int, optional): Hidden dim factor. Defaults to 2.
81
+ p_eps (float, optional): small eps value. Defaults to 1e-4.
82
+ max_temp (float, optional): Maximum temperature of output distribution. Defaults to 50.
83
+ min_temp (float, optional): Minimum temperature of output distribution. Defaults to 1e-7.
84
+ """
85
+ super().__init__()
86
+ self.p_eps = p_eps
87
+ self.max_temp = max_temp
88
+ self.min_temp = min_temp
89
+ self.log_binomial_transform = LogBinomial(n_classes, act=act)
90
+ bottleneck = (in_features + condition_dim) // bottleneck_factor
91
+ self.mlp = nn.Sequential(
92
+ nn.Conv2d(in_features + condition_dim, bottleneck,
93
+ kernel_size=1, stride=1, padding=0),
94
+ nn.GELU(),
95
+ # 2 for p linear norm, 2 for t linear norm
96
+ nn.Conv2d(bottleneck, 2+2, kernel_size=1, stride=1, padding=0),
97
+ nn.Softplus()
98
+ )
99
+
100
+ def forward(self, x, cond):
101
+ """Forward pass
102
+
103
+ Args:
104
+ x (torch.Tensor - NCHW): Main feature
105
+ cond (torch.Tensor - NCHW): condition feature
106
+
107
+ Returns:
108
+ torch.Tensor: Output log binomial distribution
109
+ """
110
+ pt = self.mlp(torch.concat((x, cond), dim=1))
111
+ p, t = pt[:, :2, ...], pt[:, 2:, ...]
112
+
113
+ p = p + self.p_eps
114
+ p = p[:, 0, ...] / (p[:, 0, ...] + p[:, 1, ...])
115
+
116
+ t = t + self.p_eps
117
+ t = t[:, 0, ...] / (t[:, 0, ...] + t[:, 1, ...])
118
+ t = t.unsqueeze(1)
119
+ t = (self.max_temp - self.min_temp) * t + self.min_temp
120
+
121
+ return self.log_binomial_transform(p, t)
metric_depth/zoedepth/models/layers/localbins_layers.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+
28
+
29
+ class SeedBinRegressor(nn.Module):
30
+ def __init__(self, in_features, n_bins=16, mlp_dim=256, min_depth=1e-3, max_depth=10):
31
+ """Bin center regressor network. Bin centers are bounded on (min_depth, max_depth) interval.
32
+
33
+ Args:
34
+ in_features (int): input channels
35
+ n_bins (int, optional): Number of bin centers. Defaults to 16.
36
+ mlp_dim (int, optional): Hidden dimension. Defaults to 256.
37
+ min_depth (float, optional): Min depth value. Defaults to 1e-3.
38
+ max_depth (float, optional): Max depth value. Defaults to 10.
39
+ """
40
+ super().__init__()
41
+ self.version = "1_1"
42
+ self.min_depth = min_depth
43
+ self.max_depth = max_depth
44
+
45
+ self._net = nn.Sequential(
46
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
47
+ nn.ReLU(inplace=True),
48
+ nn.Conv2d(mlp_dim, n_bins, 1, 1, 0),
49
+ nn.ReLU(inplace=True)
50
+ )
51
+
52
+ def forward(self, x):
53
+ """
54
+ Returns tensor of bin_width vectors (centers). One vector b for every pixel
55
+ """
56
+ B = self._net(x)
57
+ eps = 1e-3
58
+ B = B + eps
59
+ B_widths_normed = B / B.sum(dim=1, keepdim=True)
60
+ B_widths = (self.max_depth - self.min_depth) * \
61
+ B_widths_normed # .shape NCHW
62
+ # pad has the form (left, right, top, bottom, front, back)
63
+ B_widths = nn.functional.pad(
64
+ B_widths, (0, 0, 0, 0, 1, 0), mode='constant', value=self.min_depth)
65
+ B_edges = torch.cumsum(B_widths, dim=1) # .shape NCHW
66
+
67
+ B_centers = 0.5 * (B_edges[:, :-1, ...] + B_edges[:, 1:, ...])
68
+ return B_widths_normed, B_centers
69
+
70
+
71
+ class SeedBinRegressorUnnormed(nn.Module):
72
+ def __init__(self, in_features, n_bins=16, mlp_dim=256, min_depth=1e-3, max_depth=10):
73
+ """Bin center regressor network. Bin centers are unbounded
74
+
75
+ Args:
76
+ in_features (int): input channels
77
+ n_bins (int, optional): Number of bin centers. Defaults to 16.
78
+ mlp_dim (int, optional): Hidden dimension. Defaults to 256.
79
+ min_depth (float, optional): Not used. (for compatibility with SeedBinRegressor)
80
+ max_depth (float, optional): Not used. (for compatibility with SeedBinRegressor)
81
+ """
82
+ super().__init__()
83
+ self.version = "1_1"
84
+ self._net = nn.Sequential(
85
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
86
+ nn.ReLU(inplace=True),
87
+ nn.Conv2d(mlp_dim, n_bins, 1, 1, 0),
88
+ nn.Softplus()
89
+ )
90
+
91
+ def forward(self, x):
92
+ """
93
+ Returns tensor of bin_width vectors (centers). One vector b for every pixel
94
+ """
95
+ B_centers = self._net(x)
96
+ return B_centers, B_centers
97
+
98
+
99
+ class Projector(nn.Module):
100
+ def __init__(self, in_features, out_features, mlp_dim=128):
101
+ """Projector MLP
102
+
103
+ Args:
104
+ in_features (int): input channels
105
+ out_features (int): output channels
106
+ mlp_dim (int, optional): hidden dimension. Defaults to 128.
107
+ """
108
+ super().__init__()
109
+
110
+ self._net = nn.Sequential(
111
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
112
+ nn.ReLU(inplace=True),
113
+ nn.Conv2d(mlp_dim, out_features, 1, 1, 0),
114
+ )
115
+
116
+ def forward(self, x):
117
+ return self._net(x)
118
+
119
+
120
+
121
+ class LinearSplitter(nn.Module):
122
+ def __init__(self, in_features, prev_nbins, split_factor=2, mlp_dim=128, min_depth=1e-3, max_depth=10):
123
+ super().__init__()
124
+
125
+ self.prev_nbins = prev_nbins
126
+ self.split_factor = split_factor
127
+ self.min_depth = min_depth
128
+ self.max_depth = max_depth
129
+
130
+ self._net = nn.Sequential(
131
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
132
+ nn.GELU(),
133
+ nn.Conv2d(mlp_dim, prev_nbins * split_factor, 1, 1, 0),
134
+ nn.ReLU()
135
+ )
136
+
137
+ def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
138
+ """
139
+ x : feature block; shape - n, c, h, w
140
+ b_prev : previous bin widths normed; shape - n, prev_nbins, h, w
141
+ """
142
+ if prev_b_embedding is not None:
143
+ if interpolate:
144
+ prev_b_embedding = nn.functional.interpolate(prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
145
+ x = x + prev_b_embedding
146
+ S = self._net(x)
147
+ eps = 1e-3
148
+ S = S + eps
149
+ n, c, h, w = S.shape
150
+ S = S.view(n, self.prev_nbins, self.split_factor, h, w)
151
+ S_normed = S / S.sum(dim=2, keepdim=True) # fractional splits
152
+
153
+ b_prev = nn.functional.interpolate(b_prev, (h,w), mode='bilinear', align_corners=True)
154
+
155
+
156
+ b_prev = b_prev / b_prev.sum(dim=1, keepdim=True) # renormalize for gurantees
157
+ # print(b_prev.shape, S_normed.shape)
158
+ # if is_for_query:(1).expand(-1, b_prev.size(0)//n, -1, -1, -1, -1).flatten(0,1) # TODO ? can replace all this with a single torch.repeat?
159
+ b = b_prev.unsqueeze(2) * S_normed
160
+ b = b.flatten(1,2) # .shape n, prev_nbins * split_factor, h, w
161
+
162
+ # calculate bin centers for loss calculation
163
+ B_widths = (self.max_depth - self.min_depth) * b # .shape N, nprev * splitfactor, H, W
164
+ # pad has the form (left, right, top, bottom, front, back)
165
+ B_widths = nn.functional.pad(B_widths, (0,0,0,0,1,0), mode='constant', value=self.min_depth)
166
+ B_edges = torch.cumsum(B_widths, dim=1) # .shape NCHW
167
+
168
+ B_centers = 0.5 * (B_edges[:, :-1, ...] + B_edges[:,1:,...])
169
+ return b, B_centers
metric_depth/zoedepth/models/layers/patch_transformer.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+
28
+
29
+ class PatchTransformerEncoder(nn.Module):
30
+ def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4, use_class_token=False):
31
+ """ViT-like transformer block
32
+
33
+ Args:
34
+ in_channels (int): Input channels
35
+ patch_size (int, optional): patch size. Defaults to 10.
36
+ embedding_dim (int, optional): Embedding dimension in transformer model. Defaults to 128.
37
+ num_heads (int, optional): number of attention heads. Defaults to 4.
38
+ use_class_token (bool, optional): Whether to use extra token at the start for global accumulation (called as "class token"). Defaults to False.
39
+ """
40
+ super(PatchTransformerEncoder, self).__init__()
41
+ self.use_class_token = use_class_token
42
+ encoder_layers = nn.TransformerEncoderLayer(
43
+ embedding_dim, num_heads, dim_feedforward=1024)
44
+ self.transformer_encoder = nn.TransformerEncoder(
45
+ encoder_layers, num_layers=4) # takes shape S,N,E
46
+
47
+ self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim,
48
+ kernel_size=patch_size, stride=patch_size, padding=0)
49
+
50
+ def positional_encoding_1d(self, sequence_length, batch_size, embedding_dim, device='cpu'):
51
+ """Generate positional encodings
52
+
53
+ Args:
54
+ sequence_length (int): Sequence length
55
+ embedding_dim (int): Embedding dimension
56
+
57
+ Returns:
58
+ torch.Tensor SBE: Positional encodings
59
+ """
60
+ position = torch.arange(
61
+ 0, sequence_length, dtype=torch.float32, device=device).unsqueeze(1)
62
+ index = torch.arange(
63
+ 0, embedding_dim, 2, dtype=torch.float32, device=device).unsqueeze(0)
64
+ div_term = torch.exp(index * (-torch.log(torch.tensor(10000.0, device=device)) / embedding_dim))
65
+ pos_encoding = position * div_term
66
+ pos_encoding = torch.cat([torch.sin(pos_encoding), torch.cos(pos_encoding)], dim=1)
67
+ pos_encoding = pos_encoding.unsqueeze(1).repeat(1, batch_size, 1)
68
+ return pos_encoding
69
+
70
+
71
+ def forward(self, x):
72
+ """Forward pass
73
+
74
+ Args:
75
+ x (torch.Tensor - NCHW): Input feature tensor
76
+
77
+ Returns:
78
+ torch.Tensor - SNE: Transformer output embeddings. S - sequence length (=HW/patch_size^2), N - batch size, E - embedding dim
79
+ """
80
+ embeddings = self.embedding_convPxP(x).flatten(
81
+ 2) # .shape = n,c,s = n, embedding_dim, s
82
+ if self.use_class_token:
83
+ # extra special token at start ?
84
+ embeddings = nn.functional.pad(embeddings, (1, 0))
85
+
86
+ # change to S,N,E format required by transformer
87
+ embeddings = embeddings.permute(2, 0, 1)
88
+ S, N, E = embeddings.shape
89
+ embeddings = embeddings + self.positional_encoding_1d(S, N, E, device=embeddings.device)
90
+ x = self.transformer_encoder(embeddings) # .shape = S, N, E
91
+ return x
metric_depth/zoedepth/models/model_io.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+
27
+ def load_state_dict(model, state_dict):
28
+ """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict.
29
+
30
+ DataParallel prefixes state_dict keys with 'module.' when saving.
31
+ If the model is not a DataParallel model but the state_dict is, then prefixes are removed.
32
+ If the model is a DataParallel model but the state_dict is not, then prefixes are added.
33
+ """
34
+ state_dict = state_dict.get('model', state_dict)
35
+ # if model is a DataParallel model, then state_dict keys are prefixed with 'module.'
36
+
37
+ do_prefix = isinstance(
38
+ model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel))
39
+ state = {}
40
+ for k, v in state_dict.items():
41
+ if k.startswith('module.') and not do_prefix:
42
+ k = k[7:]
43
+
44
+ if not k.startswith('module.') and do_prefix:
45
+ k = 'module.' + k
46
+
47
+ state[k] = v
48
+
49
+ model.load_state_dict(state)
50
+ print("Loaded successfully")
51
+ return model
52
+
53
+
54
+ def load_wts(model, checkpoint_path):
55
+ ckpt = torch.load(checkpoint_path, map_location='cpu')
56
+ return load_state_dict(model, ckpt)
57
+
58
+
59
+ def load_state_dict_from_url(model, url, **kwargs):
60
+ state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs)
61
+ return load_state_dict(model, state_dict)
62
+
63
+
64
+ def load_state_from_resource(model, resource: str):
65
+ """Loads weights to the model from a given resource. A resource can be of following types:
66
+ 1. URL. Prefixed with "url::"
67
+ e.g. url::http(s)://url.resource.com/ckpt.pt
68
+
69
+ 2. Local path. Prefixed with "local::"
70
+ e.g. local::/path/to/ckpt.pt
71
+
72
+
73
+ Args:
74
+ model (torch.nn.Module): Model
75
+ resource (str): resource string
76
+
77
+ Returns:
78
+ torch.nn.Module: Model with loaded weights
79
+ """
80
+ print(f"Using pretrained resource {resource}")
81
+
82
+ if resource.startswith('url::'):
83
+ url = resource.split('url::')[1]
84
+ return load_state_dict_from_url(model, url, progress=True)
85
+
86
+ elif resource.startswith('local::'):
87
+ path = resource.split('local::')[1]
88
+ return load_wts(model, path)
89
+
90
+ else:
91
+ raise ValueError("Invalid resource type, only url:: and local:: are supported")
92
+