Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- LICENSE +201 -0
- README.md +278 -8
- app.py +95 -0
- controlnet/README.md +15 -0
- controlnet/config.json +51 -0
- depth_anything/__pycache__/blocks.cpython-39.pyc +0 -0
- depth_anything/__pycache__/dpt.cpython-39.pyc +0 -0
- depth_anything/blocks.py +153 -0
- depth_anything/dpt.py +187 -0
- depth_anything/util/__pycache__/transform.cpython-39.pyc +0 -0
- depth_anything/util/transform.py +248 -0
- depthanything_server.py +61 -0
- gallery.md +160 -0
- metric_depth/README.md +89 -0
- metric_depth/depth_to_pointcloud.py +79 -0
- metric_depth/environment.yml +26 -0
- metric_depth/evaluate.py +160 -0
- metric_depth/point_cloud_on_trackbar.py +168 -0
- metric_depth/train_mix.py +182 -0
- metric_depth/train_mono.py +176 -0
- metric_depth/train_test_inputs/kitti_eigen_test_files_with_gt.txt +0 -0
- metric_depth/train_test_inputs/kitti_eigen_train_files_with_gt.txt +0 -0
- metric_depth/train_test_inputs/nyudepthv2_test_files_with_gt.txt +654 -0
- metric_depth/train_test_inputs/nyudepthv2_train_files_with_gt.txt +0 -0
- metric_depth/zoedepth/data/__init__.py +24 -0
- metric_depth/zoedepth/data/data_mono.py +573 -0
- metric_depth/zoedepth/data/ddad.py +125 -0
- metric_depth/zoedepth/data/diml_indoor_test.py +125 -0
- metric_depth/zoedepth/data/diml_outdoor_test.py +114 -0
- metric_depth/zoedepth/data/diode.py +125 -0
- metric_depth/zoedepth/data/hypersim.py +138 -0
- metric_depth/zoedepth/data/ibims.py +81 -0
- metric_depth/zoedepth/data/preprocess.py +154 -0
- metric_depth/zoedepth/data/sun_rgbd_loader.py +115 -0
- metric_depth/zoedepth/data/transforms.py +481 -0
- metric_depth/zoedepth/data/vkitti.py +151 -0
- metric_depth/zoedepth/data/vkitti2.py +187 -0
- metric_depth/zoedepth/models/__init__.py +24 -0
- metric_depth/zoedepth/models/base_models/__init__.py +24 -0
- metric_depth/zoedepth/models/base_models/depth_anything.py +376 -0
- metric_depth/zoedepth/models/base_models/dpt_dinov2/blocks.py +153 -0
- metric_depth/zoedepth/models/base_models/dpt_dinov2/dpt.py +157 -0
- metric_depth/zoedepth/models/base_models/midas.py +380 -0
- metric_depth/zoedepth/models/builder.py +51 -0
- metric_depth/zoedepth/models/depth_model.py +152 -0
- metric_depth/zoedepth/models/layers/attractor.py +208 -0
- metric_depth/zoedepth/models/layers/dist_layers.py +121 -0
- metric_depth/zoedepth/models/layers/localbins_layers.py +169 -0
- metric_depth/zoedepth/models/layers/patch_transformer.py +91 -0
- metric_depth/zoedepth/models/model_io.py +92 -0
LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
README.md
CHANGED
@@ -1,12 +1,282 @@
|
|
1 |
---
|
2 |
-
title: Sketchpad
|
3 |
-
|
4 |
-
colorFrom: purple
|
5 |
-
colorTo: red
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
|
|
|
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Sketchpad-DepthAnything
|
3 |
+
app_file: depthanything_server.py
|
|
|
|
|
4 |
sdk: gradio
|
5 |
+
sdk_version: 4.39.0
|
|
|
|
|
6 |
---
|
7 |
+
<div align="center">
|
8 |
+
<h2>Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data</h2>
|
9 |
|
10 |
+
[**Lihe Yang**](https://liheyoung.github.io/)<sup>1</sup> · [**Bingyi Kang**](https://scholar.google.com/citations?user=NmHgX-wAAAAJ)<sup>2†</sup> · [**Zilong Huang**](http://speedinghzl.github.io/)<sup>2</sup> · [**Xiaogang Xu**](https://xiaogang00.github.io/)<sup>3,4</sup> · [**Jiashi Feng**](https://sites.google.com/site/jshfeng/)<sup>2</sup> · [**Hengshuang Zhao**](https://hszhao.github.io/)<sup>1*</sup>
|
11 |
+
|
12 |
+
<sup>1</sup>HKU    <sup>2</sup>TikTok    <sup>3</sup>CUHK    <sup>4</sup>ZJU
|
13 |
+
|
14 |
+
†project lead *corresponding author
|
15 |
+
|
16 |
+
**CVPR 2024**
|
17 |
+
|
18 |
+
<a href="https://arxiv.org/abs/2401.10891"><img src='https://img.shields.io/badge/arXiv-Depth Anything-red' alt='Paper PDF'></a>
|
19 |
+
<a href='https://depth-anything.github.io'><img src='https://img.shields.io/badge/Project_Page-Depth Anything-green' alt='Project Page'></a>
|
20 |
+
<a href='https://huggingface.co/spaces/LiheYoung/Depth-Anything'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
|
21 |
+
<a href='https://huggingface.co/papers/2401.10891'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Paper-yellow'></a>
|
22 |
+
</div>
|
23 |
+
|
24 |
+
This work presents Depth Anything, a highly practical solution for robust monocular depth estimation by training on a combination of 1.5M labeled images and **62M+ unlabeled images**.
|
25 |
+
|
26 |
+
![teaser](assets/teaser.png)
|
27 |
+
|
28 |
+
<div align="center">
|
29 |
+
<a href="https://github.com/DepthAnything/Depth-Anything-V2"><b>Try our latest Depth Anything V2 models!</b></a><br>
|
30 |
+
</div>
|
31 |
+
|
32 |
+
## News
|
33 |
+
|
34 |
+
* **2024-06-14:** [Depth Anything V2](https://github.com/DepthAnything/Depth-Anything-V2) is released.
|
35 |
+
* **2024-02-27:** Depth Anything is accepted by CVPR 2024.
|
36 |
+
* **2024-02-05:** [Depth Anything Gallery](./gallery.md) is released. Thank all the users!
|
37 |
+
* **2024-02-02:** Depth Anything serves as the default depth processor for [InstantID](https://github.com/InstantID/InstantID) and [InvokeAI](https://github.com/invoke-ai/InvokeAI/releases/tag/v3.6.1).
|
38 |
+
* **2024-01-25:** Support [video depth visualization](./run_video.py). An [online demo for video](https://huggingface.co/spaces/JohanDL/Depth-Anything-Video) is also available.
|
39 |
+
* **2024-01-23:** The new ControlNet based on Depth Anything is integrated into [ControlNet WebUI](https://github.com/Mikubill/sd-webui-controlnet) and [ComfyUI's ControlNet](https://github.com/Fannovel16/comfyui_controlnet_aux).
|
40 |
+
* **2024-01-23:** Depth Anything [ONNX](https://github.com/fabio-sim/Depth-Anything-ONNX) and [TensorRT](https://github.com/spacewalk01/depth-anything-tensorrt) versions are supported.
|
41 |
+
* **2024-01-22:** Paper, project page, code, models, and demo ([HuggingFace](https://huggingface.co/spaces/LiheYoung/Depth-Anything), [OpenXLab](https://openxlab.org.cn/apps/detail/yyfan/depth_anything)) are released.
|
42 |
+
|
43 |
+
|
44 |
+
## Features of Depth Anything
|
45 |
+
|
46 |
+
***If you need other features, please first check [existing community supports](#community-support).***
|
47 |
+
|
48 |
+
- **Relative depth estimation**:
|
49 |
+
|
50 |
+
Our foundation models listed [here](https://huggingface.co/spaces/LiheYoung/Depth-Anything/tree/main/checkpoints) can provide relative depth estimation for any given image robustly. Please refer [here](#running) for details.
|
51 |
+
|
52 |
+
- **Metric depth estimation**
|
53 |
+
|
54 |
+
We fine-tune our Depth Anything model with metric depth information from NYUv2 or KITTI. It offers strong capabilities of both in-domain and zero-shot metric depth estimation. Please refer [here](./metric_depth) for details.
|
55 |
+
|
56 |
+
|
57 |
+
- **Better depth-conditioned ControlNet**
|
58 |
+
|
59 |
+
We re-train **a better depth-conditioned ControlNet** based on Depth Anything. It offers more precise synthesis than the previous MiDaS-based ControlNet. Please refer [here](./controlnet/) for details. You can also use our new ControlNet based on Depth Anything in [ControlNet WebUI](https://github.com/Mikubill/sd-webui-controlnet) or [ComfyUI's ControlNet](https://github.com/Fannovel16/comfyui_controlnet_aux).
|
60 |
+
|
61 |
+
- **Downstream high-level scene understanding**
|
62 |
+
|
63 |
+
The Depth Anything encoder can be fine-tuned to downstream high-level perception tasks, *e.g.*, semantic segmentation, 86.2 mIoU on Cityscapes and 59.4 mIoU on ADE20K. Please refer [here](./semseg/) for details.
|
64 |
+
|
65 |
+
|
66 |
+
## Performance
|
67 |
+
|
68 |
+
Here we compare our Depth Anything with the previously best MiDaS v3.1 BEiT<sub>L-512</sub> model.
|
69 |
+
|
70 |
+
Please note that the latest MiDaS is also trained on KITTI and NYUv2, while we do not.
|
71 |
+
|
72 |
+
| Method | Params | KITTI || NYUv2 || Sintel || DDAD || ETH3D || DIODE ||
|
73 |
+
|-|-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|
74 |
+
| | | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ |
|
75 |
+
| MiDaS | 345.0M | 0.127 | 0.850 | 0.048 | *0.980* | 0.587 | 0.699 | 0.251 | 0.766 | 0.139 | 0.867 | 0.075 | 0.942 |
|
76 |
+
| **Ours-S** | 24.8M | 0.080 | 0.936 | 0.053 | 0.972 | 0.464 | 0.739 | 0.247 | 0.768 | 0.127 | **0.885** | 0.076 | 0.939 |
|
77 |
+
| **Ours-B** | 97.5M | *0.080* | *0.939* | *0.046* | 0.979 | **0.432** | *0.756* | *0.232* | *0.786* | **0.126** | *0.884* | *0.069* | *0.946* |
|
78 |
+
| **Ours-L** | 335.3M | **0.076** | **0.947** | **0.043** | **0.981** | *0.458* | **0.760** | **0.230** | **0.789** | *0.127* | 0.882 | **0.066** | **0.952** |
|
79 |
+
|
80 |
+
We highlight the **best** and *second best* results in **bold** and *italic* respectively (**better results**: AbsRel $\downarrow$ , $\delta_1 \uparrow$).
|
81 |
+
|
82 |
+
## Pre-trained models
|
83 |
+
|
84 |
+
We provide three models of varying scales for robust relative depth estimation:
|
85 |
+
|
86 |
+
| Model | Params | Inference Time on V100 (ms) | A100 | RTX4090 ([TensorRT](https://github.com/spacewalk01/depth-anything-tensorrt)) |
|
87 |
+
|:-|-:|:-:|:-:|:-:|
|
88 |
+
| Depth-Anything-Small | 24.8M | 12 | 8 | 3 |
|
89 |
+
| Depth-Anything-Base | 97.5M | 13 | 9 | 6 |
|
90 |
+
| Depth-Anything-Large | 335.3M | 20 | 13 | 12 |
|
91 |
+
|
92 |
+
Note that the V100 and A100 inference time (*without TensorRT*) is computed by excluding the pre-processing and post-processing stages, whereas the last column RTX4090 (*with TensorRT*) is computed by including these two stages (please refer to [Depth-Anything-TensorRT](https://github.com/spacewalk01/depth-anything-tensorrt)).
|
93 |
+
|
94 |
+
You can easily load our pre-trained models by:
|
95 |
+
```python
|
96 |
+
from depth_anything.dpt import DepthAnything
|
97 |
+
|
98 |
+
encoder = 'vits' # can also be 'vitb' or 'vitl'
|
99 |
+
depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_{:}14'.format(encoder))
|
100 |
+
```
|
101 |
+
|
102 |
+
Depth Anything is also supported in [``transformers``](https://github.com/huggingface/transformers). You can use it for depth prediction within [3 lines of code](https://huggingface.co/docs/transformers/main/model_doc/depth_anything) (credit to [@niels](https://huggingface.co/nielsr)).
|
103 |
+
|
104 |
+
### *No network connection, cannot load these models?*
|
105 |
+
|
106 |
+
<details>
|
107 |
+
<summary>Click here for solutions</summary>
|
108 |
+
|
109 |
+
- First, manually download the three checkpoints: [depth-anything-large](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints/depth_anything_vitl14.pth), [depth-anything-base](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints/depth_anything_vitb14.pth), and [depth-anything-small](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints/depth_anything_vits14.pth).
|
110 |
+
|
111 |
+
- Second, upload the folder containing the checkpoints to your remote server.
|
112 |
+
|
113 |
+
- Lastly, load the model locally:
|
114 |
+
```python
|
115 |
+
from depth_anything.dpt import DepthAnything
|
116 |
+
|
117 |
+
model_configs = {
|
118 |
+
'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
|
119 |
+
'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
|
120 |
+
'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}
|
121 |
+
}
|
122 |
+
|
123 |
+
encoder = 'vitl' # or 'vitb', 'vits'
|
124 |
+
depth_anything = DepthAnything(model_configs[encoder])
|
125 |
+
depth_anything.load_state_dict(torch.load(f'./checkpoints/depth_anything_{encoder}14.pth'))
|
126 |
+
```
|
127 |
+
Note that in this locally loading manner, you also do not have to install the ``huggingface_hub`` package. In this way, please feel free to delete this [line](https://github.com/LiheYoung/Depth-Anything/blob/e7ef4b4b7a0afd8a05ce9564f04c1e5b68268516/depth_anything/dpt.py#L5) and the ``PyTorchModelHubMixin`` in this [line](https://github.com/LiheYoung/Depth-Anything/blob/e7ef4b4b7a0afd8a05ce9564f04c1e5b68268516/depth_anything/dpt.py#L169).
|
128 |
+
</details>
|
129 |
+
|
130 |
+
|
131 |
+
## Usage
|
132 |
+
|
133 |
+
### Installation
|
134 |
+
|
135 |
+
```bash
|
136 |
+
git clone https://github.com/LiheYoung/Depth-Anything
|
137 |
+
cd Depth-Anything
|
138 |
+
pip install -r requirements.txt
|
139 |
+
```
|
140 |
+
|
141 |
+
### Running
|
142 |
+
|
143 |
+
```bash
|
144 |
+
python run.py --encoder <vits | vitb | vitl> --img-path <img-directory | single-img | txt-file> --outdir <outdir> [--pred-only] [--grayscale]
|
145 |
+
```
|
146 |
+
Arguments:
|
147 |
+
- ``--img-path``: you can either 1) point it to an image directory storing all interested images, 2) point it to a single image, or 3) point it to a text file storing all image paths.
|
148 |
+
- ``--pred-only`` is set to save the predicted depth map only. Without it, by default, we visualize both image and its depth map side by side.
|
149 |
+
- ``--grayscale`` is set to save the grayscale depth map. Without it, by default, we apply a color palette to the depth map.
|
150 |
+
|
151 |
+
For example:
|
152 |
+
```bash
|
153 |
+
python run.py --encoder vitl --img-path assets/examples --outdir depth_vis
|
154 |
+
```
|
155 |
+
|
156 |
+
**If you want to use Depth Anything on videos:**
|
157 |
+
```bash
|
158 |
+
python run_video.py --encoder vitl --video-path assets/examples_video --outdir video_depth_vis
|
159 |
+
```
|
160 |
+
|
161 |
+
### Gradio demo <a href='https://github.com/gradio-app/gradio'><img src='https://img.shields.io/github/stars/gradio-app/gradio'></a>
|
162 |
+
|
163 |
+
To use our gradio demo locally:
|
164 |
+
|
165 |
+
```bash
|
166 |
+
python app.py
|
167 |
+
```
|
168 |
+
|
169 |
+
You can also try our [online demo](https://huggingface.co/spaces/LiheYoung/Depth-Anything).
|
170 |
+
|
171 |
+
### Import Depth Anything to your project
|
172 |
+
|
173 |
+
If you want to use Depth Anything in your own project, you can simply follow [``run.py``](run.py) to load our models and define data pre-processing.
|
174 |
+
|
175 |
+
<details>
|
176 |
+
<summary>Code snippet (note the difference between our data pre-processing and that of MiDaS)</summary>
|
177 |
+
|
178 |
+
```python
|
179 |
+
from depth_anything.dpt import DepthAnything
|
180 |
+
from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
|
181 |
+
|
182 |
+
import cv2
|
183 |
+
import torch
|
184 |
+
from torchvision.transforms import Compose
|
185 |
+
|
186 |
+
encoder = 'vits' # can also be 'vitb' or 'vitl'
|
187 |
+
depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_{:}14'.format(encoder)).eval()
|
188 |
+
|
189 |
+
transform = Compose([
|
190 |
+
Resize(
|
191 |
+
width=518,
|
192 |
+
height=518,
|
193 |
+
resize_target=False,
|
194 |
+
keep_aspect_ratio=True,
|
195 |
+
ensure_multiple_of=14,
|
196 |
+
resize_method='lower_bound',
|
197 |
+
image_interpolation_method=cv2.INTER_CUBIC,
|
198 |
+
),
|
199 |
+
NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
200 |
+
PrepareForNet(),
|
201 |
+
])
|
202 |
+
|
203 |
+
image = cv2.cvtColor(cv2.imread('your image path'), cv2.COLOR_BGR2RGB) / 255.0
|
204 |
+
image = transform({'image': image})['image']
|
205 |
+
image = torch.from_numpy(image).unsqueeze(0)
|
206 |
+
|
207 |
+
# depth shape: 1xHxW
|
208 |
+
depth = depth_anything(image)
|
209 |
+
```
|
210 |
+
</details>
|
211 |
+
|
212 |
+
### Do not want to define image pre-processing or download model definition files?
|
213 |
+
|
214 |
+
Easily use Depth Anything through [``transformers``](https://github.com/huggingface/transformers) within 3 lines of code! Please refer to [these instructions](https://huggingface.co/docs/transformers/main/model_doc/depth_anything) (credit to [@niels](https://huggingface.co/nielsr)).
|
215 |
+
|
216 |
+
**Note:** If you encounter ``KeyError: 'depth_anything'``, please install the latest [``transformers``](https://github.com/huggingface/transformers) from source:
|
217 |
+
```bash
|
218 |
+
pip install git+https://github.com/huggingface/transformers.git
|
219 |
+
```
|
220 |
+
<details>
|
221 |
+
<summary>Click here for a brief demo:</summary>
|
222 |
+
|
223 |
+
```python
|
224 |
+
from transformers import pipeline
|
225 |
+
from PIL import Image
|
226 |
+
|
227 |
+
image = Image.open('Your-image-path')
|
228 |
+
pipe = pipeline(task="depth-estimation", model="LiheYoung/depth-anything-small-hf")
|
229 |
+
depth = pipe(image)["depth"]
|
230 |
+
```
|
231 |
+
</details>
|
232 |
+
|
233 |
+
## Community Support
|
234 |
+
|
235 |
+
**We sincerely appreciate all the extensions built on our Depth Anything from the community. Thank you a lot!**
|
236 |
+
|
237 |
+
Here we list the extensions we have found:
|
238 |
+
- Depth Anything TensorRT:
|
239 |
+
- https://github.com/spacewalk01/depth-anything-tensorrt
|
240 |
+
- https://github.com/thinvy/DepthAnythingTensorrtDeploy
|
241 |
+
- https://github.com/daniel89710/trt-depth-anything
|
242 |
+
- Depth Anything ONNX: https://github.com/fabio-sim/Depth-Anything-ONNX
|
243 |
+
- Depth Anything in Transformers.js (3D visualization): https://huggingface.co/spaces/Xenova/depth-anything-web
|
244 |
+
- Depth Anything for video (online demo): https://huggingface.co/spaces/JohanDL/Depth-Anything-Video
|
245 |
+
- Depth Anything in ControlNet WebUI: https://github.com/Mikubill/sd-webui-controlnet
|
246 |
+
- Depth Anything in ComfyUI's ControlNet: https://github.com/Fannovel16/comfyui_controlnet_aux
|
247 |
+
- Depth Anything in X-AnyLabeling: https://github.com/CVHub520/X-AnyLabeling
|
248 |
+
- Depth Anything in OpenXLab: https://openxlab.org.cn/apps/detail/yyfan/depth_anything
|
249 |
+
- Depth Anything in OpenVINO: https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/280-depth-anything
|
250 |
+
- Depth Anything ROS:
|
251 |
+
- https://github.com/scepter914/DepthAnything-ROS
|
252 |
+
- https://github.com/polatztrk/depth_anything_ros
|
253 |
+
- Depth Anything Android:
|
254 |
+
- https://github.com/FeiGeChuanShu/ncnn-android-depth_anything
|
255 |
+
- https://github.com/shubham0204/Depth-Anything-Android
|
256 |
+
- Depth Anything in TouchDesigner: https://github.com/olegchomp/TDDepthAnything
|
257 |
+
- LearnOpenCV research article on Depth Anything: https://learnopencv.com/depth-anything
|
258 |
+
- Learn more about the DPT architecture we used: https://github.com/heyoeyo/muggled_dpt
|
259 |
+
- Depth Anything in NVIDIA Jetson Orin: https://github.com/ZhuYaoHui1998/jetson-examples/blob/main/reComputer/scripts/depth-anything
|
260 |
+
|
261 |
+
|
262 |
+
If you have your amazing projects supporting or improving (*e.g.*, speed) Depth Anything, please feel free to drop an issue. We will add them here.
|
263 |
+
|
264 |
+
|
265 |
+
## Acknowledgement
|
266 |
+
|
267 |
+
We would like to express our deepest gratitude to [AK(@_akhaliq)](https://twitter.com/_akhaliq) and the awesome HuggingFace team ([@niels](https://huggingface.co/nielsr), [@hysts](https://huggingface.co/hysts), and [@yuvraj](https://huggingface.co/ysharma)) for helping improve the online demo and build the HF models.
|
268 |
+
|
269 |
+
Besides, we thank the [MagicEdit](https://magic-edit.github.io/) team for providing some video examples for video depth estimation, and [Tiancheng Shen](https://scholar.google.com/citations?user=iRY1YVoAAAAJ) for evaluating the depth maps with MagicEdit.
|
270 |
+
|
271 |
+
## Citation
|
272 |
+
|
273 |
+
If you find this project useful, please consider citing:
|
274 |
+
|
275 |
+
```bibtex
|
276 |
+
@inproceedings{depthanything,
|
277 |
+
title={Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data},
|
278 |
+
author={Yang, Lihe and Kang, Bingyi and Huang, Zilong and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang},
|
279 |
+
booktitle={CVPR},
|
280 |
+
year={2024}
|
281 |
+
}
|
282 |
+
```
|
app.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
import os
|
5 |
+
from PIL import Image
|
6 |
+
import torch
|
7 |
+
import torch.nn.functional as F
|
8 |
+
from torchvision.transforms import Compose
|
9 |
+
import tempfile
|
10 |
+
from gradio_imageslider import ImageSlider
|
11 |
+
|
12 |
+
from depth_anything.dpt import DepthAnything
|
13 |
+
from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
|
14 |
+
|
15 |
+
css = """
|
16 |
+
#img-display-container {
|
17 |
+
max-height: 100vh;
|
18 |
+
}
|
19 |
+
#img-display-input {
|
20 |
+
max-height: 80vh;
|
21 |
+
}
|
22 |
+
#img-display-output {
|
23 |
+
max-height: 80vh;
|
24 |
+
}
|
25 |
+
"""
|
26 |
+
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
27 |
+
model = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(DEVICE).eval()
|
28 |
+
|
29 |
+
title = "# Depth Anything"
|
30 |
+
description = """Official demo for **Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data**.
|
31 |
+
|
32 |
+
Please refer to our [paper](https://arxiv.org/abs/2401.10891), [project page](https://depth-anything.github.io), or [github](https://github.com/LiheYoung/Depth-Anything) for more details."""
|
33 |
+
|
34 |
+
transform = Compose([
|
35 |
+
Resize(
|
36 |
+
width=518,
|
37 |
+
height=518,
|
38 |
+
resize_target=False,
|
39 |
+
keep_aspect_ratio=True,
|
40 |
+
ensure_multiple_of=14,
|
41 |
+
resize_method='lower_bound',
|
42 |
+
image_interpolation_method=cv2.INTER_CUBIC,
|
43 |
+
),
|
44 |
+
NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
45 |
+
PrepareForNet(),
|
46 |
+
])
|
47 |
+
|
48 |
+
@torch.no_grad()
|
49 |
+
def predict_depth(model, image):
|
50 |
+
return model(image)
|
51 |
+
|
52 |
+
with gr.Blocks(css=css) as demo:
|
53 |
+
gr.Markdown(title)
|
54 |
+
gr.Markdown(description)
|
55 |
+
gr.Markdown("### Depth Prediction demo")
|
56 |
+
gr.Markdown("You can slide the output to compare the depth prediction with input image")
|
57 |
+
|
58 |
+
with gr.Row():
|
59 |
+
input_image = gr.Image(label="Input Image", type='numpy', elem_id='img-display-input')
|
60 |
+
depth_image_slider = ImageSlider(label="Depth Map with Slider View", elem_id='img-display-output', position=0.5)
|
61 |
+
raw_file = gr.File(label="16-bit raw depth (can be considered as disparity)")
|
62 |
+
submit = gr.Button("Submit")
|
63 |
+
|
64 |
+
def on_submit(image):
|
65 |
+
original_image = image.copy()
|
66 |
+
|
67 |
+
h, w = image.shape[:2]
|
68 |
+
|
69 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
|
70 |
+
image = transform({'image': image})['image']
|
71 |
+
image = torch.from_numpy(image).unsqueeze(0).to(DEVICE)
|
72 |
+
|
73 |
+
depth = predict_depth(model, image)
|
74 |
+
depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
|
75 |
+
|
76 |
+
raw_depth = Image.fromarray(depth.cpu().numpy().astype('uint16'))
|
77 |
+
tmp = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
|
78 |
+
raw_depth.save(tmp.name)
|
79 |
+
|
80 |
+
depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
|
81 |
+
depth = depth.cpu().numpy().astype(np.uint8)
|
82 |
+
colored_depth = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)[:, :, ::-1]
|
83 |
+
|
84 |
+
return [(original_image, colored_depth), tmp.name]
|
85 |
+
|
86 |
+
submit.click(on_submit, inputs=[input_image], outputs=[depth_image_slider, raw_file])
|
87 |
+
|
88 |
+
example_files = os.listdir('assets/examples')
|
89 |
+
example_files.sort()
|
90 |
+
example_files = [os.path.join('assets/examples', filename) for filename in example_files]
|
91 |
+
examples = gr.Examples(examples=example_files, inputs=[input_image], outputs=[depth_image_slider, raw_file], fn=on_submit, cache_examples=False)
|
92 |
+
|
93 |
+
|
94 |
+
if __name__ == '__main__':
|
95 |
+
demo.queue().launch()
|
controlnet/README.md
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Depth-Conditioned ControlNet based on Depth Anything
|
2 |
+
|
3 |
+
We use [Diffusers](https://github.com/huggingface/diffusers/tree/main) to re-train a better depth-conditioned ControlNet based on our Depth Anything.
|
4 |
+
|
5 |
+
Please download our [config file](./config.json) and [pre-trained weights](https://huggingface.co/spaces/LiheYoung/Depth-Anything/tree/main/checkpoints_controlnet), then follow the [instructions](https://github.com/huggingface/diffusers/tree/main/examples/controlnet) in Diffusers for inference.
|
6 |
+
|
7 |
+
## Depth-to-Image Synthesis
|
8 |
+
|
9 |
+
![demo2](../assets/controlnet_demo1.png)
|
10 |
+
![demo1](../assets/controlnet_demo2.png)
|
11 |
+
|
12 |
+
|
13 |
+
## Video Editing
|
14 |
+
|
15 |
+
Please refer to our [project page](https://depth-anything.github.io/). We use [MagicEdit](https://github.com/magic-research/magic-edit) to show demos of video editing based on depth information.
|
controlnet/config.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "ControlNetModel",
|
3 |
+
"_diffusers_version": "0.26.0.dev0",
|
4 |
+
"act_fn": "silu",
|
5 |
+
"addition_embed_type": null,
|
6 |
+
"addition_embed_type_num_heads": 64,
|
7 |
+
"addition_time_embed_dim": null,
|
8 |
+
"attention_head_dim": 8,
|
9 |
+
"block_out_channels": [
|
10 |
+
320,
|
11 |
+
640,
|
12 |
+
1280,
|
13 |
+
1280
|
14 |
+
],
|
15 |
+
"class_embed_type": null,
|
16 |
+
"conditioning_channels": 3,
|
17 |
+
"conditioning_embedding_out_channels": [
|
18 |
+
16,
|
19 |
+
32,
|
20 |
+
96,
|
21 |
+
256
|
22 |
+
],
|
23 |
+
"controlnet_conditioning_channel_order": "rgb",
|
24 |
+
"cross_attention_dim": 768,
|
25 |
+
"down_block_types": [
|
26 |
+
"CrossAttnDownBlock2D",
|
27 |
+
"CrossAttnDownBlock2D",
|
28 |
+
"CrossAttnDownBlock2D",
|
29 |
+
"DownBlock2D"
|
30 |
+
],
|
31 |
+
"downsample_padding": 1,
|
32 |
+
"encoder_hid_dim": null,
|
33 |
+
"encoder_hid_dim_type": null,
|
34 |
+
"flip_sin_to_cos": true,
|
35 |
+
"freq_shift": 0,
|
36 |
+
"global_pool_conditions": false,
|
37 |
+
"in_channels": 4,
|
38 |
+
"layers_per_block": 2,
|
39 |
+
"mid_block_scale_factor": 1,
|
40 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
41 |
+
"norm_eps": 1e-05,
|
42 |
+
"norm_num_groups": 32,
|
43 |
+
"num_attention_heads": null,
|
44 |
+
"num_class_embeds": null,
|
45 |
+
"only_cross_attention": false,
|
46 |
+
"projection_class_embeddings_input_dim": null,
|
47 |
+
"resnet_time_scale_shift": "default",
|
48 |
+
"transformer_layers_per_block": 1,
|
49 |
+
"upcast_attention": false,
|
50 |
+
"use_linear_projection": false
|
51 |
+
}
|
depth_anything/__pycache__/blocks.cpython-39.pyc
ADDED
Binary file (3.22 kB). View file
|
|
depth_anything/__pycache__/dpt.cpython-39.pyc
ADDED
Binary file (5.03 kB). View file
|
|
depth_anything/blocks.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch.nn as nn
|
2 |
+
|
3 |
+
|
4 |
+
def _make_scratch(in_shape, out_shape, groups=1, expand=False):
|
5 |
+
scratch = nn.Module()
|
6 |
+
|
7 |
+
out_shape1 = out_shape
|
8 |
+
out_shape2 = out_shape
|
9 |
+
out_shape3 = out_shape
|
10 |
+
if len(in_shape) >= 4:
|
11 |
+
out_shape4 = out_shape
|
12 |
+
|
13 |
+
if expand:
|
14 |
+
out_shape1 = out_shape
|
15 |
+
out_shape2 = out_shape*2
|
16 |
+
out_shape3 = out_shape*4
|
17 |
+
if len(in_shape) >= 4:
|
18 |
+
out_shape4 = out_shape*8
|
19 |
+
|
20 |
+
scratch.layer1_rn = nn.Conv2d(
|
21 |
+
in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
|
22 |
+
)
|
23 |
+
scratch.layer2_rn = nn.Conv2d(
|
24 |
+
in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
|
25 |
+
)
|
26 |
+
scratch.layer3_rn = nn.Conv2d(
|
27 |
+
in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
|
28 |
+
)
|
29 |
+
if len(in_shape) >= 4:
|
30 |
+
scratch.layer4_rn = nn.Conv2d(
|
31 |
+
in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
|
32 |
+
)
|
33 |
+
|
34 |
+
return scratch
|
35 |
+
|
36 |
+
|
37 |
+
class ResidualConvUnit(nn.Module):
|
38 |
+
"""Residual convolution module.
|
39 |
+
"""
|
40 |
+
|
41 |
+
def __init__(self, features, activation, bn):
|
42 |
+
"""Init.
|
43 |
+
|
44 |
+
Args:
|
45 |
+
features (int): number of features
|
46 |
+
"""
|
47 |
+
super().__init__()
|
48 |
+
|
49 |
+
self.bn = bn
|
50 |
+
|
51 |
+
self.groups=1
|
52 |
+
|
53 |
+
self.conv1 = nn.Conv2d(
|
54 |
+
features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups
|
55 |
+
)
|
56 |
+
|
57 |
+
self.conv2 = nn.Conv2d(
|
58 |
+
features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups
|
59 |
+
)
|
60 |
+
|
61 |
+
if self.bn==True:
|
62 |
+
self.bn1 = nn.BatchNorm2d(features)
|
63 |
+
self.bn2 = nn.BatchNorm2d(features)
|
64 |
+
|
65 |
+
self.activation = activation
|
66 |
+
|
67 |
+
self.skip_add = nn.quantized.FloatFunctional()
|
68 |
+
|
69 |
+
def forward(self, x):
|
70 |
+
"""Forward pass.
|
71 |
+
|
72 |
+
Args:
|
73 |
+
x (tensor): input
|
74 |
+
|
75 |
+
Returns:
|
76 |
+
tensor: output
|
77 |
+
"""
|
78 |
+
|
79 |
+
out = self.activation(x)
|
80 |
+
out = self.conv1(out)
|
81 |
+
if self.bn==True:
|
82 |
+
out = self.bn1(out)
|
83 |
+
|
84 |
+
out = self.activation(out)
|
85 |
+
out = self.conv2(out)
|
86 |
+
if self.bn==True:
|
87 |
+
out = self.bn2(out)
|
88 |
+
|
89 |
+
if self.groups > 1:
|
90 |
+
out = self.conv_merge(out)
|
91 |
+
|
92 |
+
return self.skip_add.add(out, x)
|
93 |
+
|
94 |
+
|
95 |
+
class FeatureFusionBlock(nn.Module):
|
96 |
+
"""Feature fusion block.
|
97 |
+
"""
|
98 |
+
|
99 |
+
def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True, size=None):
|
100 |
+
"""Init.
|
101 |
+
|
102 |
+
Args:
|
103 |
+
features (int): number of features
|
104 |
+
"""
|
105 |
+
super(FeatureFusionBlock, self).__init__()
|
106 |
+
|
107 |
+
self.deconv = deconv
|
108 |
+
self.align_corners = align_corners
|
109 |
+
|
110 |
+
self.groups=1
|
111 |
+
|
112 |
+
self.expand = expand
|
113 |
+
out_features = features
|
114 |
+
if self.expand==True:
|
115 |
+
out_features = features//2
|
116 |
+
|
117 |
+
self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
|
118 |
+
|
119 |
+
self.resConfUnit1 = ResidualConvUnit(features, activation, bn)
|
120 |
+
self.resConfUnit2 = ResidualConvUnit(features, activation, bn)
|
121 |
+
|
122 |
+
self.skip_add = nn.quantized.FloatFunctional()
|
123 |
+
|
124 |
+
self.size=size
|
125 |
+
|
126 |
+
def forward(self, *xs, size=None):
|
127 |
+
"""Forward pass.
|
128 |
+
|
129 |
+
Returns:
|
130 |
+
tensor: output
|
131 |
+
"""
|
132 |
+
output = xs[0]
|
133 |
+
|
134 |
+
if len(xs) == 2:
|
135 |
+
res = self.resConfUnit1(xs[1])
|
136 |
+
output = self.skip_add.add(output, res)
|
137 |
+
|
138 |
+
output = self.resConfUnit2(output)
|
139 |
+
|
140 |
+
if (size is None) and (self.size is None):
|
141 |
+
modifier = {"scale_factor": 2}
|
142 |
+
elif size is None:
|
143 |
+
modifier = {"size": self.size}
|
144 |
+
else:
|
145 |
+
modifier = {"size": size}
|
146 |
+
|
147 |
+
output = nn.functional.interpolate(
|
148 |
+
output, **modifier, mode="bilinear", align_corners=self.align_corners
|
149 |
+
)
|
150 |
+
|
151 |
+
output = self.out_conv(output)
|
152 |
+
|
153 |
+
return output
|
depth_anything/dpt.py
ADDED
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.nn.functional as F
|
5 |
+
from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
|
6 |
+
|
7 |
+
from depth_anything.blocks import FeatureFusionBlock, _make_scratch
|
8 |
+
|
9 |
+
|
10 |
+
def _make_fusion_block(features, use_bn, size = None):
|
11 |
+
return FeatureFusionBlock(
|
12 |
+
features,
|
13 |
+
nn.ReLU(False),
|
14 |
+
deconv=False,
|
15 |
+
bn=use_bn,
|
16 |
+
expand=False,
|
17 |
+
align_corners=True,
|
18 |
+
size=size,
|
19 |
+
)
|
20 |
+
|
21 |
+
|
22 |
+
class DPTHead(nn.Module):
|
23 |
+
def __init__(self, nclass, in_channels, features=256, use_bn=False, out_channels=[256, 512, 1024, 1024], use_clstoken=False):
|
24 |
+
super(DPTHead, self).__init__()
|
25 |
+
|
26 |
+
self.nclass = nclass
|
27 |
+
self.use_clstoken = use_clstoken
|
28 |
+
|
29 |
+
self.projects = nn.ModuleList([
|
30 |
+
nn.Conv2d(
|
31 |
+
in_channels=in_channels,
|
32 |
+
out_channels=out_channel,
|
33 |
+
kernel_size=1,
|
34 |
+
stride=1,
|
35 |
+
padding=0,
|
36 |
+
) for out_channel in out_channels
|
37 |
+
])
|
38 |
+
|
39 |
+
self.resize_layers = nn.ModuleList([
|
40 |
+
nn.ConvTranspose2d(
|
41 |
+
in_channels=out_channels[0],
|
42 |
+
out_channels=out_channels[0],
|
43 |
+
kernel_size=4,
|
44 |
+
stride=4,
|
45 |
+
padding=0),
|
46 |
+
nn.ConvTranspose2d(
|
47 |
+
in_channels=out_channels[1],
|
48 |
+
out_channels=out_channels[1],
|
49 |
+
kernel_size=2,
|
50 |
+
stride=2,
|
51 |
+
padding=0),
|
52 |
+
nn.Identity(),
|
53 |
+
nn.Conv2d(
|
54 |
+
in_channels=out_channels[3],
|
55 |
+
out_channels=out_channels[3],
|
56 |
+
kernel_size=3,
|
57 |
+
stride=2,
|
58 |
+
padding=1)
|
59 |
+
])
|
60 |
+
|
61 |
+
if use_clstoken:
|
62 |
+
self.readout_projects = nn.ModuleList()
|
63 |
+
for _ in range(len(self.projects)):
|
64 |
+
self.readout_projects.append(
|
65 |
+
nn.Sequential(
|
66 |
+
nn.Linear(2 * in_channels, in_channels),
|
67 |
+
nn.GELU()))
|
68 |
+
|
69 |
+
self.scratch = _make_scratch(
|
70 |
+
out_channels,
|
71 |
+
features,
|
72 |
+
groups=1,
|
73 |
+
expand=False,
|
74 |
+
)
|
75 |
+
|
76 |
+
self.scratch.stem_transpose = None
|
77 |
+
|
78 |
+
self.scratch.refinenet1 = _make_fusion_block(features, use_bn)
|
79 |
+
self.scratch.refinenet2 = _make_fusion_block(features, use_bn)
|
80 |
+
self.scratch.refinenet3 = _make_fusion_block(features, use_bn)
|
81 |
+
self.scratch.refinenet4 = _make_fusion_block(features, use_bn)
|
82 |
+
|
83 |
+
head_features_1 = features
|
84 |
+
head_features_2 = 32
|
85 |
+
|
86 |
+
if nclass > 1:
|
87 |
+
self.scratch.output_conv = nn.Sequential(
|
88 |
+
nn.Conv2d(head_features_1, head_features_1, kernel_size=3, stride=1, padding=1),
|
89 |
+
nn.ReLU(True),
|
90 |
+
nn.Conv2d(head_features_1, nclass, kernel_size=1, stride=1, padding=0),
|
91 |
+
)
|
92 |
+
else:
|
93 |
+
self.scratch.output_conv1 = nn.Conv2d(head_features_1, head_features_1 // 2, kernel_size=3, stride=1, padding=1)
|
94 |
+
|
95 |
+
self.scratch.output_conv2 = nn.Sequential(
|
96 |
+
nn.Conv2d(head_features_1 // 2, head_features_2, kernel_size=3, stride=1, padding=1),
|
97 |
+
nn.ReLU(True),
|
98 |
+
nn.Conv2d(head_features_2, 1, kernel_size=1, stride=1, padding=0),
|
99 |
+
nn.ReLU(True),
|
100 |
+
nn.Identity(),
|
101 |
+
)
|
102 |
+
|
103 |
+
def forward(self, out_features, patch_h, patch_w):
|
104 |
+
out = []
|
105 |
+
for i, x in enumerate(out_features):
|
106 |
+
if self.use_clstoken:
|
107 |
+
x, cls_token = x[0], x[1]
|
108 |
+
readout = cls_token.unsqueeze(1).expand_as(x)
|
109 |
+
x = self.readout_projects[i](torch.cat((x, readout), -1))
|
110 |
+
else:
|
111 |
+
x = x[0]
|
112 |
+
|
113 |
+
x = x.permute(0, 2, 1).reshape((x.shape[0], x.shape[-1], patch_h, patch_w))
|
114 |
+
|
115 |
+
x = self.projects[i](x)
|
116 |
+
x = self.resize_layers[i](x)
|
117 |
+
|
118 |
+
out.append(x)
|
119 |
+
|
120 |
+
layer_1, layer_2, layer_3, layer_4 = out
|
121 |
+
|
122 |
+
layer_1_rn = self.scratch.layer1_rn(layer_1)
|
123 |
+
layer_2_rn = self.scratch.layer2_rn(layer_2)
|
124 |
+
layer_3_rn = self.scratch.layer3_rn(layer_3)
|
125 |
+
layer_4_rn = self.scratch.layer4_rn(layer_4)
|
126 |
+
|
127 |
+
path_4 = self.scratch.refinenet4(layer_4_rn, size=layer_3_rn.shape[2:])
|
128 |
+
path_3 = self.scratch.refinenet3(path_4, layer_3_rn, size=layer_2_rn.shape[2:])
|
129 |
+
path_2 = self.scratch.refinenet2(path_3, layer_2_rn, size=layer_1_rn.shape[2:])
|
130 |
+
path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
|
131 |
+
|
132 |
+
out = self.scratch.output_conv1(path_1)
|
133 |
+
out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True)
|
134 |
+
out = self.scratch.output_conv2(out)
|
135 |
+
|
136 |
+
return out
|
137 |
+
|
138 |
+
|
139 |
+
class DPT_DINOv2(nn.Module):
|
140 |
+
def __init__(self, encoder='vitl', features=256, out_channels=[256, 512, 1024, 1024], use_bn=False, use_clstoken=False, localhub=True):
|
141 |
+
super(DPT_DINOv2, self).__init__()
|
142 |
+
|
143 |
+
assert encoder in ['vits', 'vitb', 'vitl']
|
144 |
+
|
145 |
+
# in case the Internet connection is not stable, please load the DINOv2 locally
|
146 |
+
if localhub:
|
147 |
+
self.pretrained = torch.hub.load('torchhub/facebookresearch_dinov2_main', 'dinov2_{:}14'.format(encoder), source='local', pretrained=False)
|
148 |
+
else:
|
149 |
+
self.pretrained = torch.hub.load('facebookresearch/dinov2', 'dinov2_{:}14'.format(encoder))
|
150 |
+
|
151 |
+
dim = self.pretrained.blocks[0].attn.qkv.in_features
|
152 |
+
|
153 |
+
self.depth_head = DPTHead(1, dim, features, use_bn, out_channels=out_channels, use_clstoken=use_clstoken)
|
154 |
+
|
155 |
+
def forward(self, x):
|
156 |
+
h, w = x.shape[-2:]
|
157 |
+
|
158 |
+
features = self.pretrained.get_intermediate_layers(x, 4, return_class_token=True)
|
159 |
+
|
160 |
+
patch_h, patch_w = h // 14, w // 14
|
161 |
+
|
162 |
+
depth = self.depth_head(features, patch_h, patch_w)
|
163 |
+
depth = F.interpolate(depth, size=(h, w), mode="bilinear", align_corners=True)
|
164 |
+
depth = F.relu(depth)
|
165 |
+
|
166 |
+
return depth.squeeze(1)
|
167 |
+
|
168 |
+
|
169 |
+
class DepthAnything(DPT_DINOv2, PyTorchModelHubMixin):
|
170 |
+
def __init__(self, config):
|
171 |
+
super().__init__(**config)
|
172 |
+
|
173 |
+
|
174 |
+
if __name__ == '__main__':
|
175 |
+
parser = argparse.ArgumentParser()
|
176 |
+
parser.add_argument(
|
177 |
+
"--encoder",
|
178 |
+
default="vits",
|
179 |
+
type=str,
|
180 |
+
choices=["vits", "vitb", "vitl"],
|
181 |
+
)
|
182 |
+
args = parser.parse_args()
|
183 |
+
|
184 |
+
model = DepthAnything.from_pretrained("LiheYoung/depth_anything_{:}14".format(args.encoder))
|
185 |
+
|
186 |
+
print(model)
|
187 |
+
|
depth_anything/util/__pycache__/transform.cpython-39.pyc
ADDED
Binary file (6.07 kB). View file
|
|
depth_anything/util/transform.py
ADDED
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
from PIL import Image, ImageOps, ImageFilter
|
3 |
+
import torch
|
4 |
+
from torchvision import transforms
|
5 |
+
import torch.nn.functional as F
|
6 |
+
|
7 |
+
import numpy as np
|
8 |
+
import cv2
|
9 |
+
import math
|
10 |
+
|
11 |
+
|
12 |
+
def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
|
13 |
+
"""Rezise the sample to ensure the given size. Keeps aspect ratio.
|
14 |
+
|
15 |
+
Args:
|
16 |
+
sample (dict): sample
|
17 |
+
size (tuple): image size
|
18 |
+
|
19 |
+
Returns:
|
20 |
+
tuple: new size
|
21 |
+
"""
|
22 |
+
shape = list(sample["disparity"].shape)
|
23 |
+
|
24 |
+
if shape[0] >= size[0] and shape[1] >= size[1]:
|
25 |
+
return sample
|
26 |
+
|
27 |
+
scale = [0, 0]
|
28 |
+
scale[0] = size[0] / shape[0]
|
29 |
+
scale[1] = size[1] / shape[1]
|
30 |
+
|
31 |
+
scale = max(scale)
|
32 |
+
|
33 |
+
shape[0] = math.ceil(scale * shape[0])
|
34 |
+
shape[1] = math.ceil(scale * shape[1])
|
35 |
+
|
36 |
+
# resize
|
37 |
+
sample["image"] = cv2.resize(
|
38 |
+
sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method
|
39 |
+
)
|
40 |
+
|
41 |
+
sample["disparity"] = cv2.resize(
|
42 |
+
sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST
|
43 |
+
)
|
44 |
+
sample["mask"] = cv2.resize(
|
45 |
+
sample["mask"].astype(np.float32),
|
46 |
+
tuple(shape[::-1]),
|
47 |
+
interpolation=cv2.INTER_NEAREST,
|
48 |
+
)
|
49 |
+
sample["mask"] = sample["mask"].astype(bool)
|
50 |
+
|
51 |
+
return tuple(shape)
|
52 |
+
|
53 |
+
|
54 |
+
class Resize(object):
|
55 |
+
"""Resize sample to given size (width, height).
|
56 |
+
"""
|
57 |
+
|
58 |
+
def __init__(
|
59 |
+
self,
|
60 |
+
width,
|
61 |
+
height,
|
62 |
+
resize_target=True,
|
63 |
+
keep_aspect_ratio=False,
|
64 |
+
ensure_multiple_of=1,
|
65 |
+
resize_method="lower_bound",
|
66 |
+
image_interpolation_method=cv2.INTER_AREA,
|
67 |
+
):
|
68 |
+
"""Init.
|
69 |
+
|
70 |
+
Args:
|
71 |
+
width (int): desired output width
|
72 |
+
height (int): desired output height
|
73 |
+
resize_target (bool, optional):
|
74 |
+
True: Resize the full sample (image, mask, target).
|
75 |
+
False: Resize image only.
|
76 |
+
Defaults to True.
|
77 |
+
keep_aspect_ratio (bool, optional):
|
78 |
+
True: Keep the aspect ratio of the input sample.
|
79 |
+
Output sample might not have the given width and height, and
|
80 |
+
resize behaviour depends on the parameter 'resize_method'.
|
81 |
+
Defaults to False.
|
82 |
+
ensure_multiple_of (int, optional):
|
83 |
+
Output width and height is constrained to be multiple of this parameter.
|
84 |
+
Defaults to 1.
|
85 |
+
resize_method (str, optional):
|
86 |
+
"lower_bound": Output will be at least as large as the given size.
|
87 |
+
"upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
|
88 |
+
"minimal": Scale as least as possible. (Output size might be smaller than given size.)
|
89 |
+
Defaults to "lower_bound".
|
90 |
+
"""
|
91 |
+
self.__width = width
|
92 |
+
self.__height = height
|
93 |
+
|
94 |
+
self.__resize_target = resize_target
|
95 |
+
self.__keep_aspect_ratio = keep_aspect_ratio
|
96 |
+
self.__multiple_of = ensure_multiple_of
|
97 |
+
self.__resize_method = resize_method
|
98 |
+
self.__image_interpolation_method = image_interpolation_method
|
99 |
+
|
100 |
+
def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
|
101 |
+
y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
102 |
+
|
103 |
+
if max_val is not None and y > max_val:
|
104 |
+
y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
105 |
+
|
106 |
+
if y < min_val:
|
107 |
+
y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
108 |
+
|
109 |
+
return y
|
110 |
+
|
111 |
+
def get_size(self, width, height):
|
112 |
+
# determine new height and width
|
113 |
+
scale_height = self.__height / height
|
114 |
+
scale_width = self.__width / width
|
115 |
+
|
116 |
+
if self.__keep_aspect_ratio:
|
117 |
+
if self.__resize_method == "lower_bound":
|
118 |
+
# scale such that output size is lower bound
|
119 |
+
if scale_width > scale_height:
|
120 |
+
# fit width
|
121 |
+
scale_height = scale_width
|
122 |
+
else:
|
123 |
+
# fit height
|
124 |
+
scale_width = scale_height
|
125 |
+
elif self.__resize_method == "upper_bound":
|
126 |
+
# scale such that output size is upper bound
|
127 |
+
if scale_width < scale_height:
|
128 |
+
# fit width
|
129 |
+
scale_height = scale_width
|
130 |
+
else:
|
131 |
+
# fit height
|
132 |
+
scale_width = scale_height
|
133 |
+
elif self.__resize_method == "minimal":
|
134 |
+
# scale as least as possbile
|
135 |
+
if abs(1 - scale_width) < abs(1 - scale_height):
|
136 |
+
# fit width
|
137 |
+
scale_height = scale_width
|
138 |
+
else:
|
139 |
+
# fit height
|
140 |
+
scale_width = scale_height
|
141 |
+
else:
|
142 |
+
raise ValueError(
|
143 |
+
f"resize_method {self.__resize_method} not implemented"
|
144 |
+
)
|
145 |
+
|
146 |
+
if self.__resize_method == "lower_bound":
|
147 |
+
new_height = self.constrain_to_multiple_of(
|
148 |
+
scale_height * height, min_val=self.__height
|
149 |
+
)
|
150 |
+
new_width = self.constrain_to_multiple_of(
|
151 |
+
scale_width * width, min_val=self.__width
|
152 |
+
)
|
153 |
+
elif self.__resize_method == "upper_bound":
|
154 |
+
new_height = self.constrain_to_multiple_of(
|
155 |
+
scale_height * height, max_val=self.__height
|
156 |
+
)
|
157 |
+
new_width = self.constrain_to_multiple_of(
|
158 |
+
scale_width * width, max_val=self.__width
|
159 |
+
)
|
160 |
+
elif self.__resize_method == "minimal":
|
161 |
+
new_height = self.constrain_to_multiple_of(scale_height * height)
|
162 |
+
new_width = self.constrain_to_multiple_of(scale_width * width)
|
163 |
+
else:
|
164 |
+
raise ValueError(f"resize_method {self.__resize_method} not implemented")
|
165 |
+
|
166 |
+
return (new_width, new_height)
|
167 |
+
|
168 |
+
def __call__(self, sample):
|
169 |
+
width, height = self.get_size(
|
170 |
+
sample["image"].shape[1], sample["image"].shape[0]
|
171 |
+
)
|
172 |
+
|
173 |
+
# resize sample
|
174 |
+
sample["image"] = cv2.resize(
|
175 |
+
sample["image"],
|
176 |
+
(width, height),
|
177 |
+
interpolation=self.__image_interpolation_method,
|
178 |
+
)
|
179 |
+
|
180 |
+
if self.__resize_target:
|
181 |
+
if "disparity" in sample:
|
182 |
+
sample["disparity"] = cv2.resize(
|
183 |
+
sample["disparity"],
|
184 |
+
(width, height),
|
185 |
+
interpolation=cv2.INTER_NEAREST,
|
186 |
+
)
|
187 |
+
|
188 |
+
if "depth" in sample:
|
189 |
+
sample["depth"] = cv2.resize(
|
190 |
+
sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST
|
191 |
+
)
|
192 |
+
|
193 |
+
if "semseg_mask" in sample:
|
194 |
+
# sample["semseg_mask"] = cv2.resize(
|
195 |
+
# sample["semseg_mask"], (width, height), interpolation=cv2.INTER_NEAREST
|
196 |
+
# )
|
197 |
+
sample["semseg_mask"] = F.interpolate(torch.from_numpy(sample["semseg_mask"]).float()[None, None, ...], (height, width), mode='nearest').numpy()[0, 0]
|
198 |
+
|
199 |
+
if "mask" in sample:
|
200 |
+
sample["mask"] = cv2.resize(
|
201 |
+
sample["mask"].astype(np.float32),
|
202 |
+
(width, height),
|
203 |
+
interpolation=cv2.INTER_NEAREST,
|
204 |
+
)
|
205 |
+
# sample["mask"] = sample["mask"].astype(bool)
|
206 |
+
|
207 |
+
# print(sample['image'].shape, sample['depth'].shape)
|
208 |
+
return sample
|
209 |
+
|
210 |
+
|
211 |
+
class NormalizeImage(object):
|
212 |
+
"""Normlize image by given mean and std.
|
213 |
+
"""
|
214 |
+
|
215 |
+
def __init__(self, mean, std):
|
216 |
+
self.__mean = mean
|
217 |
+
self.__std = std
|
218 |
+
|
219 |
+
def __call__(self, sample):
|
220 |
+
sample["image"] = (sample["image"] - self.__mean) / self.__std
|
221 |
+
|
222 |
+
return sample
|
223 |
+
|
224 |
+
|
225 |
+
class PrepareForNet(object):
|
226 |
+
"""Prepare sample for usage as network input.
|
227 |
+
"""
|
228 |
+
|
229 |
+
def __init__(self):
|
230 |
+
pass
|
231 |
+
|
232 |
+
def __call__(self, sample):
|
233 |
+
image = np.transpose(sample["image"], (2, 0, 1))
|
234 |
+
sample["image"] = np.ascontiguousarray(image).astype(np.float32)
|
235 |
+
|
236 |
+
if "mask" in sample:
|
237 |
+
sample["mask"] = sample["mask"].astype(np.float32)
|
238 |
+
sample["mask"] = np.ascontiguousarray(sample["mask"])
|
239 |
+
|
240 |
+
if "depth" in sample:
|
241 |
+
depth = sample["depth"].astype(np.float32)
|
242 |
+
sample["depth"] = np.ascontiguousarray(depth)
|
243 |
+
|
244 |
+
if "semseg_mask" in sample:
|
245 |
+
sample["semseg_mask"] = sample["semseg_mask"].astype(np.float32)
|
246 |
+
sample["semseg_mask"] = np.ascontiguousarray(sample["semseg_mask"])
|
247 |
+
|
248 |
+
return sample
|
depthanything_server.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
import os
|
5 |
+
from PIL import Image
|
6 |
+
import torch
|
7 |
+
import torch.nn.functional as F
|
8 |
+
from torchvision.transforms import Compose
|
9 |
+
|
10 |
+
from depth_anything.dpt import DepthAnything
|
11 |
+
from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
|
12 |
+
|
13 |
+
|
14 |
+
transform = Compose([
|
15 |
+
Resize(
|
16 |
+
width=518,
|
17 |
+
height=518,
|
18 |
+
resize_target=False,
|
19 |
+
keep_aspect_ratio=True,
|
20 |
+
ensure_multiple_of=14,
|
21 |
+
resize_method='lower_bound',
|
22 |
+
image_interpolation_method=cv2.INTER_CUBIC,
|
23 |
+
),
|
24 |
+
NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
25 |
+
PrepareForNet(),
|
26 |
+
])
|
27 |
+
|
28 |
+
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
29 |
+
model = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(DEVICE).eval()
|
30 |
+
|
31 |
+
|
32 |
+
def predict_depthmap(image):
|
33 |
+
original_image = image.copy()
|
34 |
+
|
35 |
+
h, w = image.shape[:2]
|
36 |
+
|
37 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
|
38 |
+
image = transform({'image': image})['image']
|
39 |
+
image = torch.from_numpy(image).unsqueeze(0).to(DEVICE)
|
40 |
+
|
41 |
+
with torch.no_grad():
|
42 |
+
depth = model(image)
|
43 |
+
depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
|
44 |
+
depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
|
45 |
+
depth = depth.cpu().numpy().astype(np.uint8)
|
46 |
+
colored_depth = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)[:, :, ::-1]
|
47 |
+
|
48 |
+
# colored_depth = Image.fromarray(cv2.cvtColor(colored_depth, cv2.COLOR_BGR2RGB))
|
49 |
+
corlored_depth = Image.fromarray(colored_depth)
|
50 |
+
|
51 |
+
return colored_depth
|
52 |
+
|
53 |
+
|
54 |
+
demo = gr.Interface(fn=predict_depthmap, inputs=[gr.Image()],
|
55 |
+
outputs=[gr.Image(type="pil")]
|
56 |
+
)
|
57 |
+
|
58 |
+
demo.launch(share=True, server_name="localhost", server_port=8082)
|
59 |
+
|
60 |
+
|
61 |
+
|
gallery.md
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# $Depth$ $Anything$ ${\color{crimson}G\color{coral}a\color{royalblue}l\color{olive}l\color{teal}e\color{navy}r\color{plum}y}$
|
2 |
+
|
3 |
+
|
4 |
+
|
5 |
+
Here we exhibit awesome community showcases of Depth Anything. Thank all the users for sharing them on the Internet (mainly from Twitter).
|
6 |
+
|
7 |
+
We organize these cases into three groups: [**image**](#image), [**video**](#video), and [**3D**](#3d).
|
8 |
+
|
9 |
+
|
10 |
+
## Image
|
11 |
+
|
12 |
+
You can click on the titles below to be directed to corresponding source pages.
|
13 |
+
|
14 |
+
### [Monument Valley](https://twitter.com/weebney/status/1749541957108441309)
|
15 |
+
|
16 |
+
<img src="assets/gallery/monument_valley.jpg" width="60%"/>
|
17 |
+
|
18 |
+
### [Cyber rabbit monitoring screens](https://twitter.com/hayas1357/status/1749298607260316139)
|
19 |
+
|
20 |
+
<img src="assets/gallery/cyber_rabbit.jpg" width="60%"/>
|
21 |
+
|
22 |
+
### [Astronaut cat](https://twitter.com/nanase_ja/status/1749653152406884392)
|
23 |
+
|
24 |
+
<img src="assets/gallery/astronaut_cat.jpg" width="60%"/>
|
25 |
+
|
26 |
+
### [Animation images](https://twitter.com/PlayShingo/status/1750368475867128200)
|
27 |
+
|
28 |
+
<img src="assets/gallery/animation_image.jpg" width="90%"/>
|
29 |
+
|
30 |
+
### [DALL·E bear](https://twitter.com/letalvoj/status/1749341999646347741)
|
31 |
+
|
32 |
+
<img src="assets/gallery/dalle_bear.jpg" width="60%"/>
|
33 |
+
|
34 |
+
### [Cat](https://twitter.com/sajilobroker/status/1749364184419016846)
|
35 |
+
|
36 |
+
<img src="assets/gallery/cat.jpg" width="60%"/>
|
37 |
+
|
38 |
+
### [Surprised bald man](https://twitter.com/mayfer/status/1749712454408679780)
|
39 |
+
|
40 |
+
<img src="assets/gallery/surprised_bald_man.jpg" width="60%"/>
|
41 |
+
|
42 |
+
### [Minecraft](https://twitter.com/BarlowTwin/status/1749353070008693224)
|
43 |
+
|
44 |
+
<img src="assets/gallery/minecraft.jpg" width="90%"/>
|
45 |
+
|
46 |
+
### [Robotic knight amidst lightning](https://twitter.com/IterIntellectus/status/1749432836158021738)
|
47 |
+
|
48 |
+
<img src="assets/gallery/robotic_knight.jpg" width="45%"/>
|
49 |
+
|
50 |
+
### [Football game](https://twitter.com/AB9Mamun/status/1751202608545456235)
|
51 |
+
|
52 |
+
<img src="assets/gallery/football_game.jpg" width="60%"/>
|
53 |
+
|
54 |
+
### [Classical raft painting](https://twitter.com/acidbjazz/status/1749491155698331774)
|
55 |
+
|
56 |
+
<img src="assets/gallery/raft_painting.jpg" width="60%"/>
|
57 |
+
|
58 |
+
### [Diner scene](https://twitter.com/R0b0tSp1der/status/1749301061964435846)
|
59 |
+
|
60 |
+
<img src="assets/gallery/diner_scene.jpg" width="60%"/>
|
61 |
+
|
62 |
+
### [Elon Musk](https://twitter.com/ai_for_success/status/1749304903418482954)
|
63 |
+
|
64 |
+
<img src="assets/gallery/elon_musk.jpg" width="60%"/>
|
65 |
+
|
66 |
+
### [Painted tunnel](https://twitter.com/NodiMend/status/1750800040304492814)
|
67 |
+
|
68 |
+
<img src="assets/gallery/painted_tunnel.jpg" width="40%"/>
|
69 |
+
|
70 |
+
### [Iron man](https://twitter.com/ai_for_success/status/1749304906664808751)
|
71 |
+
|
72 |
+
<img src="assets/gallery/iron_man.jpg" width="60%"/>
|
73 |
+
|
74 |
+
### [Skull](https://twitter.com/ai_for_success/status/1749304909730906381)
|
75 |
+
|
76 |
+
<img src="assets/gallery/skull.jpg" width="60%"/>
|
77 |
+
|
78 |
+
### [Chibi cat-eared character](https://twitter.com/nanase_ja/status/1749484958522204605)
|
79 |
+
|
80 |
+
<img src="assets/gallery/chibi_cateared_character.jpg" width="60%"/>
|
81 |
+
|
82 |
+
### [Exuberant gamer celebration](https://twitter.com/hmaon/status/1749372352016625748)
|
83 |
+
|
84 |
+
<img src="assets/gallery/gamer_celebration.jpg" width="60%"/>
|
85 |
+
|
86 |
+
### [Ocean](https://twitter.com/jarrahorphin/status/1749878678111309870)
|
87 |
+
|
88 |
+
<img src="assets/gallery/ocean.jpg" width="60%"/>
|
89 |
+
|
90 |
+
### [Aerial images](https://twitter.com/lTlanual/status/1749641678124892384)
|
91 |
+
|
92 |
+
<img src="assets/gallery/aerial_image.jpg" width="60%"/>
|
93 |
+
|
94 |
+
### [Grilled chicken skewers](https://twitter.com/promptlord/status/1752323556409856157)
|
95 |
+
|
96 |
+
<img src="assets/gallery/grilled_chicken_skewers.jpg" width="60%"/>
|
97 |
+
|
98 |
+
### [Artistic images](https://twitter.com/ZainHasan6/status/1753553755998416933)
|
99 |
+
|
100 |
+
<img src="assets/gallery/artistic_image.jpg" width="90%"/>
|
101 |
+
|
102 |
+
### [Iconic distracted man](https://twitter.com/ZainHasan6/status/1749308193237303620)
|
103 |
+
|
104 |
+
<img src="assets/gallery/distracted_man.jpg" width="60%"/>
|
105 |
+
|
106 |
+
### [Eye-stalked](https://twitter.com/RJdoesVR/status/1749494967800590780)
|
107 |
+
|
108 |
+
<img src="assets/gallery/eye-stalked.jpg" width="60%"/>
|
109 |
+
|
110 |
+
### [Tearful green frog](https://twitter.com/qsdnl/status/1749298425064313080)
|
111 |
+
|
112 |
+
<img src="assets/gallery/tearful_green_frog.jpg" width="60%"/>
|
113 |
+
|
114 |
+
|
115 |
+
## Video
|
116 |
+
|
117 |
+
For more online showcases, please refer to https://twitter.com/WilliamLamkin/status/1755623301907460582.
|
118 |
+
|
119 |
+
The videos below may be slow to load. Please wait a moment.
|
120 |
+
|
121 |
+
### [Racing game](https://twitter.com/i/status/1750683014152040853)
|
122 |
+
|
123 |
+
<img src="assets/gallery/racing_car.gif" width="80%"/>
|
124 |
+
|
125 |
+
### [Building](https://twitter.com/WayneINR/status/1750945037863551247)
|
126 |
+
|
127 |
+
<img src="assets/gallery/building.gif" width="80%"/>
|
128 |
+
|
129 |
+
### [nuScenes](https://github.com/scepter914/DepthAnything-ROS)
|
130 |
+
|
131 |
+
<img src="assets/gallery/nuscenes.gif" width="80%"/>
|
132 |
+
|
133 |
+
### [Indoor moving](https://twitter.com/PINTO03091/status/1750162506453041437)
|
134 |
+
|
135 |
+
<img src="assets/gallery/indoor_moving.gif" width="40%"/>
|
136 |
+
|
137 |
+
|
138 |
+
## 3D
|
139 |
+
|
140 |
+
The videos below may be slow to load. Please wait a moment.
|
141 |
+
|
142 |
+
### [3D visualization](https://twitter.com/victormustar/status/1753008143469093212)
|
143 |
+
|
144 |
+
<img src="assets/gallery/3d_vis1.gif" width="50%"/><br><br>
|
145 |
+
<img src="assets/gallery/3d_vis2.gif" width="50%"/>
|
146 |
+
|
147 |
+
### [2D videos to 3D videos](https://twitter.com/stspanho/status/1751709292913143895)
|
148 |
+
|
149 |
+
<img src="assets/gallery/3d_video.gif" width="60%"/>
|
150 |
+
|
151 |
+
### Reconstruction
|
152 |
+
|
153 |
+
- [case1](https://twitter.com/Artoid_XYZ/status/1751542601772421378)
|
154 |
+
|
155 |
+
<img src="assets/gallery/reconstruction2.jpeg" width="60%"/>
|
156 |
+
|
157 |
+
- [case2](https://twitter.com/DennisLoevlie/status/1753846358463709489)
|
158 |
+
|
159 |
+
<img src="assets/gallery/reconstruction.jpg" width="60%"/>
|
160 |
+
|
metric_depth/README.md
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Depth Anything for Metric Depth Estimation
|
2 |
+
|
3 |
+
Our Depth Anything models primarily focus on robust *relative* depth estimation. To achieve *metric* depth estimation, we follow ZoeDepth to fine-tune from our Depth Anything pre-trained encoder with metric depth information from NYUv2 or KITTI.
|
4 |
+
|
5 |
+
|
6 |
+
## Performance
|
7 |
+
|
8 |
+
### *In-domain* metric depth estimation
|
9 |
+
|
10 |
+
#### NYUv2
|
11 |
+
|
12 |
+
| Method | $\delta_1 \uparrow$ | $\delta_2 \uparrow$ | $\delta_3 \uparrow$ | AbsRel $\downarrow$ | RMSE $\downarrow$ | log10 $\downarrow$ |
|
13 |
+
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|
14 |
+
| ZoeDepth | 0.951 | 0.994 | 0.999 | 0.077 | 0.282 | 0.033 |
|
15 |
+
| Depth Anything | **0.984** | **0.998** | **1.000** | **0.056** | **0.206** | **0.024** |
|
16 |
+
|
17 |
+
|
18 |
+
#### KITTI
|
19 |
+
|
20 |
+
| Method | $\delta_1 \uparrow$ | $\delta_2 \uparrow$ | $\delta_3 \uparrow$ | AbsRel $\downarrow$ | RMSE $\downarrow$ | log10 $\downarrow$ |
|
21 |
+
|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|
22 |
+
| ZoeDepth | 0.971 | 0.996 | 0.999 | 0.054 | 2.281 | 0.082 |
|
23 |
+
| Depth Anything | **0.982** | **0.998** | **1.000** | **0.046** | **1.896** | **0.069** |
|
24 |
+
|
25 |
+
|
26 |
+
### *Zero-shot* metric depth estimation
|
27 |
+
|
28 |
+
Indoor: NYUv2 $\rightarrow$ SUN RGB-D, iBims-1, and HyperSim<br>
|
29 |
+
Outdoor: KITTI $\rightarrow$ Virtual KITTI 2 and DIODE Outdoor
|
30 |
+
|
31 |
+
|
32 |
+
| Method | SUN || iBims || HyperSim || vKITTI || DIODE Outdoor ||
|
33 |
+
|-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|
34 |
+
| | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ | AbsRel | $\delta_1$ |
|
35 |
+
| ZoeDepth | 0.520 | 0.545 | 0.169 | 0.656 | 0.407 | 0.302 | 0.106 | 0.844 | 0.814 | 0.237 |
|
36 |
+
| Depth Anything | **0.500** | **0.660** | **0.150** | **0.714** | **0.363** | **0.361** | **0.085** | **0.913** | **0.794** | **0.288** |
|
37 |
+
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
## Pre-trained metric depth estimation models
|
42 |
+
|
43 |
+
We provide [two pre-trained models](https://huggingface.co/spaces/LiheYoung/Depth-Anything/tree/main/checkpoints_metric_depth), one for *indoor* metric depth estimation trained on NYUv2, and the other for *outdoor* metric depth estimation trained on KITTI.
|
44 |
+
|
45 |
+
## Installation
|
46 |
+
|
47 |
+
```bash
|
48 |
+
conda env create -n depth_anything_metric --file environment.yml
|
49 |
+
conda activate depth_anything_metric
|
50 |
+
```
|
51 |
+
|
52 |
+
Please follow [ZoeDepth](https://github.com/isl-org/ZoeDepth) to prepare the training and test datasets.
|
53 |
+
|
54 |
+
## Evaluation
|
55 |
+
|
56 |
+
Make sure you have downloaded our pre-trained metric-depth models [here](https://huggingface.co/spaces/LiheYoung/Depth-Anything/tree/main/checkpoints_metric_depth) (for evaluation) and pre-trained relative-depth model [here](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints/depth_anything_vitl14.pth) (for initializing the encoder) and put them under the ``checkpoints`` directory.
|
57 |
+
|
58 |
+
Indoor:
|
59 |
+
```bash
|
60 |
+
python evaluate.py -m zoedepth --pretrained_resource="local::./checkpoints/depth_anything_metric_depth_indoor.pt" -d <nyu | sunrgbd | ibims | hypersim_test>
|
61 |
+
```
|
62 |
+
|
63 |
+
Outdoor:
|
64 |
+
```bash
|
65 |
+
python evaluate.py -m zoedepth --pretrained_resource="local::./checkpoints/depth_anything_metric_depth_outdoor.pt" -d <kitti | vkitti2 | diode_outdoor>
|
66 |
+
```
|
67 |
+
|
68 |
+
## Training
|
69 |
+
|
70 |
+
Please first download our Depth Anything pre-trained model [here](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints/depth_anything_vitl14.pth), and put it under the ``checkpoints`` directory.
|
71 |
+
|
72 |
+
```bash
|
73 |
+
python train_mono.py -m zoedepth -d <nyu | kitti> --pretrained_resource=""
|
74 |
+
```
|
75 |
+
|
76 |
+
This will automatically use our Depth Anything pre-trained ViT-L encoder.
|
77 |
+
|
78 |
+
## Citation
|
79 |
+
|
80 |
+
If you find this project useful, please consider citing:
|
81 |
+
|
82 |
+
```bibtex
|
83 |
+
@article{depthanything,
|
84 |
+
title={Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data},
|
85 |
+
author={Yang, Lihe and Kang, Bingyi and Huang, Zilong and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang},
|
86 |
+
journal={arXiv:2401.10891},
|
87 |
+
year={2024},
|
88 |
+
}
|
89 |
+
```
|
metric_depth/depth_to_pointcloud.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Born out of Issue 36.
|
2 |
+
# Allows the user to set up own test files to infer on (Create a folder my_test and add subfolder input and output in the metric_depth directory before running this script.)
|
3 |
+
# Make sure you have the necessary libraries
|
4 |
+
# Code by @1ssb
|
5 |
+
|
6 |
+
import argparse
|
7 |
+
import os
|
8 |
+
import glob
|
9 |
+
import torch
|
10 |
+
import numpy as np
|
11 |
+
from PIL import Image
|
12 |
+
import torchvision.transforms as transforms
|
13 |
+
import open3d as o3d
|
14 |
+
from tqdm import tqdm
|
15 |
+
from zoedepth.models.builder import build_model
|
16 |
+
from zoedepth.utils.config import get_config
|
17 |
+
|
18 |
+
# Global settings
|
19 |
+
FL = 715.0873
|
20 |
+
FY = 256 * 0.6
|
21 |
+
FX = 256 * 0.6
|
22 |
+
NYU_DATA = False
|
23 |
+
FINAL_HEIGHT = 256
|
24 |
+
FINAL_WIDTH = 256
|
25 |
+
INPUT_DIR = './my_test/input'
|
26 |
+
OUTPUT_DIR = './my_test/output'
|
27 |
+
DATASET = 'nyu' # Lets not pick a fight with the model's dataloader
|
28 |
+
|
29 |
+
def process_images(model):
|
30 |
+
if not os.path.exists(OUTPUT_DIR):
|
31 |
+
os.makedirs(OUTPUT_DIR)
|
32 |
+
|
33 |
+
image_paths = glob.glob(os.path.join(INPUT_DIR, '*.png')) + glob.glob(os.path.join(INPUT_DIR, '*.jpg'))
|
34 |
+
for image_path in tqdm(image_paths, desc="Processing Images"):
|
35 |
+
try:
|
36 |
+
color_image = Image.open(image_path).convert('RGB')
|
37 |
+
original_width, original_height = color_image.size
|
38 |
+
image_tensor = transforms.ToTensor()(color_image).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
|
39 |
+
|
40 |
+
pred = model(image_tensor, dataset=DATASET)
|
41 |
+
if isinstance(pred, dict):
|
42 |
+
pred = pred.get('metric_depth', pred.get('out'))
|
43 |
+
elif isinstance(pred, (list, tuple)):
|
44 |
+
pred = pred[-1]
|
45 |
+
pred = pred.squeeze().detach().cpu().numpy()
|
46 |
+
|
47 |
+
# Resize color image and depth to final size
|
48 |
+
resized_color_image = color_image.resize((FINAL_WIDTH, FINAL_HEIGHT), Image.LANCZOS)
|
49 |
+
resized_pred = Image.fromarray(pred).resize((FINAL_WIDTH, FINAL_HEIGHT), Image.NEAREST)
|
50 |
+
|
51 |
+
focal_length_x, focal_length_y = (FX, FY) if not NYU_DATA else (FL, FL)
|
52 |
+
x, y = np.meshgrid(np.arange(FINAL_WIDTH), np.arange(FINAL_HEIGHT))
|
53 |
+
x = (x - FINAL_WIDTH / 2) / focal_length_x
|
54 |
+
y = (y - FINAL_HEIGHT / 2) / focal_length_y
|
55 |
+
z = np.array(resized_pred)
|
56 |
+
points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3)
|
57 |
+
colors = np.array(resized_color_image).reshape(-1, 3) / 255.0
|
58 |
+
|
59 |
+
pcd = o3d.geometry.PointCloud()
|
60 |
+
pcd.points = o3d.utility.Vector3dVector(points)
|
61 |
+
pcd.colors = o3d.utility.Vector3dVector(colors)
|
62 |
+
o3d.io.write_point_cloud(os.path.join(OUTPUT_DIR, os.path.splitext(os.path.basename(image_path))[0] + ".ply"), pcd)
|
63 |
+
except Exception as e:
|
64 |
+
print(f"Error processing {image_path}: {e}")
|
65 |
+
|
66 |
+
def main(model_name, pretrained_resource):
|
67 |
+
config = get_config(model_name, "eval", DATASET)
|
68 |
+
config.pretrained_resource = pretrained_resource
|
69 |
+
model = build_model(config).to('cuda' if torch.cuda.is_available() else 'cpu')
|
70 |
+
model.eval()
|
71 |
+
process_images(model)
|
72 |
+
|
73 |
+
if __name__ == '__main__':
|
74 |
+
parser = argparse.ArgumentParser()
|
75 |
+
parser.add_argument("-m", "--model", type=str, default='zoedepth', help="Name of the model to test")
|
76 |
+
parser.add_argument("-p", "--pretrained_resource", type=str, default='local::./checkpoints/depth_anything_metric_depth_indoor.pt', help="Pretrained resource to use for fetching weights.")
|
77 |
+
|
78 |
+
args = parser.parse_args()
|
79 |
+
main(args.model, args.pretrained_resource)
|
metric_depth/environment.yml
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: zoe
|
2 |
+
channels:
|
3 |
+
- pytorch
|
4 |
+
- nvidia
|
5 |
+
- conda-forge
|
6 |
+
dependencies:
|
7 |
+
- cuda=11.7.1
|
8 |
+
- h5py=3.7.0
|
9 |
+
- hdf5=1.12.2
|
10 |
+
- matplotlib=3.6.2
|
11 |
+
- matplotlib-base=3.6.2
|
12 |
+
- numpy=1.24.1
|
13 |
+
- opencv=4.6.0
|
14 |
+
- pip=22.3.1
|
15 |
+
- python=3.9.7
|
16 |
+
- pytorch=1.13.1
|
17 |
+
- pytorch-cuda=11.7
|
18 |
+
- pytorch-mutex=1.0
|
19 |
+
- scipy=1.10.0
|
20 |
+
- torchaudio=0.13.1
|
21 |
+
- torchvision=0.14.1
|
22 |
+
- pip:
|
23 |
+
- huggingface-hub==0.11.1
|
24 |
+
- timm==0.6.12
|
25 |
+
- tqdm==4.64.1
|
26 |
+
- wandb==0.13.9
|
metric_depth/evaluate.py
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import argparse
|
26 |
+
from pprint import pprint
|
27 |
+
|
28 |
+
import torch
|
29 |
+
from zoedepth.utils.easydict import EasyDict as edict
|
30 |
+
from tqdm import tqdm
|
31 |
+
|
32 |
+
from zoedepth.data.data_mono import DepthDataLoader
|
33 |
+
from zoedepth.models.builder import build_model
|
34 |
+
from zoedepth.utils.arg_utils import parse_unknown
|
35 |
+
from zoedepth.utils.config import change_dataset, get_config, ALL_EVAL_DATASETS, ALL_INDOOR, ALL_OUTDOOR
|
36 |
+
from zoedepth.utils.misc import (RunningAverageDict, colors, compute_metrics,
|
37 |
+
count_parameters)
|
38 |
+
|
39 |
+
|
40 |
+
@torch.no_grad()
|
41 |
+
def infer(model, images, **kwargs):
|
42 |
+
"""Inference with flip augmentation"""
|
43 |
+
# images.shape = N, C, H, W
|
44 |
+
def get_depth_from_prediction(pred):
|
45 |
+
if isinstance(pred, torch.Tensor):
|
46 |
+
pred = pred # pass
|
47 |
+
elif isinstance(pred, (list, tuple)):
|
48 |
+
pred = pred[-1]
|
49 |
+
elif isinstance(pred, dict):
|
50 |
+
pred = pred['metric_depth'] if 'metric_depth' in pred else pred['out']
|
51 |
+
else:
|
52 |
+
raise NotImplementedError(f"Unknown output type {type(pred)}")
|
53 |
+
return pred
|
54 |
+
|
55 |
+
pred1 = model(images, **kwargs)
|
56 |
+
pred1 = get_depth_from_prediction(pred1)
|
57 |
+
|
58 |
+
pred2 = model(torch.flip(images, [3]), **kwargs)
|
59 |
+
pred2 = get_depth_from_prediction(pred2)
|
60 |
+
pred2 = torch.flip(pred2, [3])
|
61 |
+
|
62 |
+
mean_pred = 0.5 * (pred1 + pred2)
|
63 |
+
|
64 |
+
return mean_pred
|
65 |
+
|
66 |
+
|
67 |
+
@torch.no_grad()
|
68 |
+
def evaluate(model, test_loader, config, round_vals=True, round_precision=3):
|
69 |
+
model.eval()
|
70 |
+
metrics = RunningAverageDict()
|
71 |
+
for i, sample in tqdm(enumerate(test_loader), total=len(test_loader)):
|
72 |
+
if 'has_valid_depth' in sample:
|
73 |
+
if not sample['has_valid_depth']:
|
74 |
+
continue
|
75 |
+
image, depth = sample['image'], sample['depth']
|
76 |
+
image, depth = image.cuda(), depth.cuda()
|
77 |
+
depth = depth.squeeze().unsqueeze(0).unsqueeze(0)
|
78 |
+
focal = sample.get('focal', torch.Tensor(
|
79 |
+
[715.0873]).cuda()) # This magic number (focal) is only used for evaluating BTS model
|
80 |
+
pred = infer(model, image, dataset=sample['dataset'][0], focal=focal)
|
81 |
+
|
82 |
+
# Save image, depth, pred for visualization
|
83 |
+
if "save_images" in config and config.save_images:
|
84 |
+
import os
|
85 |
+
# print("Saving images ...")
|
86 |
+
from PIL import Image
|
87 |
+
import torchvision.transforms as transforms
|
88 |
+
from zoedepth.utils.misc import colorize
|
89 |
+
|
90 |
+
os.makedirs(config.save_images, exist_ok=True)
|
91 |
+
# def save_image(img, path):
|
92 |
+
d = colorize(depth.squeeze().cpu().numpy(), 0, 10)
|
93 |
+
p = colorize(pred.squeeze().cpu().numpy(), 0, 10)
|
94 |
+
im = transforms.ToPILImage()(image.squeeze().cpu())
|
95 |
+
im.save(os.path.join(config.save_images, f"{i}_img.png"))
|
96 |
+
Image.fromarray(d).save(os.path.join(config.save_images, f"{i}_depth.png"))
|
97 |
+
Image.fromarray(p).save(os.path.join(config.save_images, f"{i}_pred.png"))
|
98 |
+
|
99 |
+
|
100 |
+
|
101 |
+
# print(depth.shape, pred.shape)
|
102 |
+
metrics.update(compute_metrics(depth, pred, config=config))
|
103 |
+
|
104 |
+
if round_vals:
|
105 |
+
def r(m): return round(m, round_precision)
|
106 |
+
else:
|
107 |
+
def r(m): return m
|
108 |
+
metrics = {k: r(v) for k, v in metrics.get_value().items()}
|
109 |
+
return metrics
|
110 |
+
|
111 |
+
def main(config):
|
112 |
+
model = build_model(config)
|
113 |
+
test_loader = DepthDataLoader(config, 'online_eval').data
|
114 |
+
model = model.cuda()
|
115 |
+
metrics = evaluate(model, test_loader, config)
|
116 |
+
print(f"{colors.fg.green}")
|
117 |
+
print(metrics)
|
118 |
+
print(f"{colors.reset}")
|
119 |
+
metrics['#params'] = f"{round(count_parameters(model, include_all=True)/1e6, 2)}M"
|
120 |
+
return metrics
|
121 |
+
|
122 |
+
|
123 |
+
def eval_model(model_name, pretrained_resource, dataset='nyu', **kwargs):
|
124 |
+
|
125 |
+
# Load default pretrained resource defined in config if not set
|
126 |
+
overwrite = {**kwargs, "pretrained_resource": pretrained_resource} if pretrained_resource else kwargs
|
127 |
+
config = get_config(model_name, "eval", dataset, **overwrite)
|
128 |
+
# config = change_dataset(config, dataset) # change the dataset
|
129 |
+
pprint(config)
|
130 |
+
print(f"Evaluating {model_name} on {dataset}...")
|
131 |
+
metrics = main(config)
|
132 |
+
return metrics
|
133 |
+
|
134 |
+
|
135 |
+
if __name__ == '__main__':
|
136 |
+
parser = argparse.ArgumentParser()
|
137 |
+
parser.add_argument("-m", "--model", type=str,
|
138 |
+
required=True, help="Name of the model to evaluate")
|
139 |
+
parser.add_argument("-p", "--pretrained_resource", type=str,
|
140 |
+
required=False, default="", help="Pretrained resource to use for fetching weights. If not set, default resource from model config is used, Refer models.model_io.load_state_from_resource for more details.")
|
141 |
+
parser.add_argument("-d", "--dataset", type=str, required=False,
|
142 |
+
default='nyu', help="Dataset to evaluate on")
|
143 |
+
|
144 |
+
args, unknown_args = parser.parse_known_args()
|
145 |
+
overwrite_kwargs = parse_unknown(unknown_args)
|
146 |
+
|
147 |
+
if "ALL_INDOOR" in args.dataset:
|
148 |
+
datasets = ALL_INDOOR
|
149 |
+
elif "ALL_OUTDOOR" in args.dataset:
|
150 |
+
datasets = ALL_OUTDOOR
|
151 |
+
elif "ALL" in args.dataset:
|
152 |
+
datasets = ALL_EVAL_DATASETS
|
153 |
+
elif "," in args.dataset:
|
154 |
+
datasets = args.dataset.split(",")
|
155 |
+
else:
|
156 |
+
datasets = [args.dataset]
|
157 |
+
|
158 |
+
for dataset in datasets:
|
159 |
+
eval_model(args.model, pretrained_resource=args.pretrained_resource,
|
160 |
+
dataset=dataset, **overwrite_kwargs)
|
metric_depth/point_cloud_on_trackbar.py
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Born out of Depth Anything V2
|
3 |
+
Make sure you have the necessary libraries installed.
|
4 |
+
Code by @1ssb
|
5 |
+
|
6 |
+
This script processes a video to generate depth maps and corresponding point clouds for each frame.
|
7 |
+
The resulting depth maps are saved in a video format, and the point clouds can be interactively generated for selected frames.
|
8 |
+
|
9 |
+
Usage:
|
10 |
+
python script.py --video-path path_to_video --input-size 518 --outdir output_directory --encoder vitl --focal-length-x 470.4 --focal-length-y 470.4 --pred-only --grayscale
|
11 |
+
|
12 |
+
Arguments:
|
13 |
+
--video-path: Path to the input video.
|
14 |
+
--input-size: Size to which the input frame is resized for depth prediction.
|
15 |
+
--outdir: Directory to save the output video and point clouds.
|
16 |
+
--encoder: Model encoder to use. Choices are ['vits', 'vitb', 'vitl', 'vitg'].
|
17 |
+
--focal-length-x: Focal length along the x-axis.
|
18 |
+
--focal-length-y: Focal length along the y-axis.
|
19 |
+
--pred-only: Only display the prediction without the original frame.
|
20 |
+
--grayscale: Do not apply colorful palette to the depth map.
|
21 |
+
"""
|
22 |
+
|
23 |
+
import argparse
|
24 |
+
import cv2
|
25 |
+
import glob
|
26 |
+
import matplotlib
|
27 |
+
import numpy as np
|
28 |
+
import os
|
29 |
+
import torch
|
30 |
+
import open3d as o3d
|
31 |
+
|
32 |
+
from depth_anything_v2.dpt import DepthAnythingV2
|
33 |
+
|
34 |
+
|
35 |
+
def main():
|
36 |
+
# Parse command-line arguments
|
37 |
+
parser = argparse.ArgumentParser(description='Depth Anything V2 with Point Cloud Generation')
|
38 |
+
parser.add_argument('--video-path', type=str, required=True, help='Path to the input video.')
|
39 |
+
parser.add_argument('--input-size', type=int, default=518, help='Size to which the input frame is resized for depth prediction.')
|
40 |
+
parser.add_argument('--outdir', type=str, default='./vis_video_depth', help='Directory to save the output video and point clouds.')
|
41 |
+
parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitb', 'vitl', 'vitg'], help='Model encoder to use.')
|
42 |
+
parser.add_argument('--focal-length-x', default=470.4, type=float, help='Focal length along the x-axis.')
|
43 |
+
parser.add_argument('--focal-length-y', default=470.4, type=float, help='Focal length along the y-axis.')
|
44 |
+
parser.add_argument('--pred-only', dest='pred_only', action='store_true', help='Only display the prediction.')
|
45 |
+
parser.add_argument('--grayscale', dest='grayscale', action='store_true', help='Do not apply colorful palette.')
|
46 |
+
|
47 |
+
args = parser.parse_args()
|
48 |
+
|
49 |
+
# Determine the device to use (CUDA, MPS, or CPU)
|
50 |
+
DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
|
51 |
+
|
52 |
+
# Model configuration based on the chosen encoder
|
53 |
+
model_configs = {
|
54 |
+
'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
|
55 |
+
'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
|
56 |
+
'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
|
57 |
+
'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
|
58 |
+
}
|
59 |
+
|
60 |
+
# Initialize the DepthAnythingV2 model with the specified configuration
|
61 |
+
depth_anything = DepthAnythingV2(**model_configs[args.encoder])
|
62 |
+
depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu'))
|
63 |
+
depth_anything = depth_anything.to(DEVICE).eval()
|
64 |
+
|
65 |
+
# Get the list of video files to process
|
66 |
+
if os.path.isfile(args.video_path):
|
67 |
+
if args.video_path.endswith('txt'):
|
68 |
+
with open(args.video_path, 'r') as f:
|
69 |
+
lines = f.read().splitlines()
|
70 |
+
else:
|
71 |
+
filenames = [args.video_path]
|
72 |
+
else:
|
73 |
+
filenames = glob.glob(os.path.join(args.video_path, '**/*'), recursive=True)
|
74 |
+
|
75 |
+
# Create the output directory if it doesn't exist
|
76 |
+
os.makedirs(args.outdir, exist_ok=True)
|
77 |
+
|
78 |
+
margin_width = 50
|
79 |
+
cmap = matplotlib.colormaps.get_cmap('Spectral_r')
|
80 |
+
|
81 |
+
for k, filename in enumerate(filenames):
|
82 |
+
print(f'Processing {k+1}/{len(filenames)}: {filename}')
|
83 |
+
|
84 |
+
raw_video = cv2.VideoCapture(filename)
|
85 |
+
frame_width, frame_height = int(raw_video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(raw_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
86 |
+
frame_rate = int(raw_video.get(cv2.CAP_PROP_FPS))
|
87 |
+
|
88 |
+
if args.pred_only:
|
89 |
+
output_width = frame_width
|
90 |
+
else:
|
91 |
+
output_width = frame_width * 2 + margin_width
|
92 |
+
|
93 |
+
output_path = os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.mp4')
|
94 |
+
out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (output_width, frame_height))
|
95 |
+
|
96 |
+
frame_index = 0
|
97 |
+
frame_data = []
|
98 |
+
|
99 |
+
while raw_video.isOpened():
|
100 |
+
ret, raw_frame = raw_video.read()
|
101 |
+
if not ret:
|
102 |
+
break
|
103 |
+
|
104 |
+
depth = depth_anything.infer_image(raw_frame, args.input_size)
|
105 |
+
|
106 |
+
depth_normalized = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
|
107 |
+
depth_normalized = depth_normalized.astype(np.uint8)
|
108 |
+
|
109 |
+
if args.grayscale:
|
110 |
+
depth_colored = np.repeat(depth_normalized[..., np.newaxis], 3, axis=-1)
|
111 |
+
else:
|
112 |
+
depth_colored = (cmap(depth_normalized)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8)
|
113 |
+
|
114 |
+
if args.pred_only:
|
115 |
+
out.write(depth_colored)
|
116 |
+
else:
|
117 |
+
split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255
|
118 |
+
combined_frame = cv2.hconcat([raw_frame, split_region, depth_colored])
|
119 |
+
out.write(combined_frame)
|
120 |
+
|
121 |
+
frame_data.append((raw_frame, depth, depth_colored))
|
122 |
+
frame_index += 1
|
123 |
+
|
124 |
+
raw_video.release()
|
125 |
+
out.release()
|
126 |
+
|
127 |
+
# Function to create point cloud from depth map
|
128 |
+
def create_point_cloud(raw_frame, depth_map, frame_index):
|
129 |
+
height, width = raw_frame.shape[:2]
|
130 |
+
focal_length_x = args.focal_length_x
|
131 |
+
focal_length_y = args.focal_length_y
|
132 |
+
|
133 |
+
x, y = np.meshgrid(np.arange(width), np.arange(height))
|
134 |
+
x = (x - width / 2) / focal_length_x
|
135 |
+
y = (y - height / 2) / focal_length_y
|
136 |
+
z = np.array(depth_map)
|
137 |
+
|
138 |
+
points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3)
|
139 |
+
colors = raw_frame.reshape(-1, 3) / 255.0
|
140 |
+
|
141 |
+
pcd = o3d.geometry.PointCloud()
|
142 |
+
pcd.points = o3d.utility.Vector3dVector(points)
|
143 |
+
pcd.colors = o3d.utility.Vector3dVector(colors)
|
144 |
+
|
145 |
+
pcd_path = os.path.join(args.outdir, f'frame_{frame_index}_point_cloud.ply')
|
146 |
+
o3d.io.write_point_cloud(pcd_path, pcd)
|
147 |
+
print(f'Point cloud saved to {pcd_path}')
|
148 |
+
|
149 |
+
# Interactive window to select a frame and generate its point cloud
|
150 |
+
def on_trackbar(val):
|
151 |
+
frame_index = val
|
152 |
+
raw_frame, depth_map, _ = frame_data[frame_index]
|
153 |
+
create_point_cloud(raw_frame, depth_map, frame_index)
|
154 |
+
|
155 |
+
if frame_data:
|
156 |
+
cv2.namedWindow('Select Frame for Point Cloud')
|
157 |
+
cv2.createTrackbar('Frame', 'Select Frame for Point Cloud', 0, frame_index - 1, on_trackbar)
|
158 |
+
|
159 |
+
while True:
|
160 |
+
key = cv2.waitKey(1) & 0xFF
|
161 |
+
if key == 27: # Esc key to exit
|
162 |
+
break
|
163 |
+
|
164 |
+
cv2.destroyAllWindows()
|
165 |
+
|
166 |
+
|
167 |
+
if __name__ == '__main__':
|
168 |
+
main()
|
metric_depth/train_mix.py
ADDED
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
from zoedepth.utils.misc import count_parameters, parallelize
|
26 |
+
from zoedepth.utils.config import get_config
|
27 |
+
from zoedepth.utils.arg_utils import parse_unknown
|
28 |
+
from zoedepth.trainers.builder import get_trainer
|
29 |
+
from zoedepth.models.builder import build_model
|
30 |
+
from zoedepth.data.data_mono import MixedNYUKITTI
|
31 |
+
import torch.utils.data.distributed
|
32 |
+
import torch.multiprocessing as mp
|
33 |
+
import torch
|
34 |
+
import numpy as np
|
35 |
+
from pprint import pprint
|
36 |
+
import argparse
|
37 |
+
import os
|
38 |
+
|
39 |
+
os.environ["PYOPENGL_PLATFORM"] = "egl"
|
40 |
+
os.environ["WANDB_START_METHOD"] = "thread"
|
41 |
+
|
42 |
+
|
43 |
+
def fix_random_seed(seed: int):
|
44 |
+
"""
|
45 |
+
Fix random seed for reproducibility
|
46 |
+
|
47 |
+
Args:
|
48 |
+
seed (int): random seed
|
49 |
+
"""
|
50 |
+
import random
|
51 |
+
|
52 |
+
import numpy
|
53 |
+
import torch
|
54 |
+
|
55 |
+
random.seed(seed)
|
56 |
+
numpy.random.seed(seed)
|
57 |
+
torch.manual_seed(seed)
|
58 |
+
torch.cuda.manual_seed(seed)
|
59 |
+
torch.cuda.manual_seed_all(seed)
|
60 |
+
|
61 |
+
torch.backends.cudnn.deterministic = True
|
62 |
+
torch.backends.cudnn.benchmark = False
|
63 |
+
|
64 |
+
|
65 |
+
def load_ckpt(config, model, checkpoint_dir="./checkpoints", ckpt_type="best"):
|
66 |
+
import glob
|
67 |
+
import os
|
68 |
+
|
69 |
+
from zoedepth.models.model_io import load_wts
|
70 |
+
|
71 |
+
if hasattr(config, "checkpoint"):
|
72 |
+
checkpoint = config.checkpoint
|
73 |
+
elif hasattr(config, "ckpt_pattern"):
|
74 |
+
pattern = config.ckpt_pattern
|
75 |
+
matches = glob.glob(os.path.join(
|
76 |
+
checkpoint_dir, f"*{pattern}*{ckpt_type}*"))
|
77 |
+
if not (len(matches) > 0):
|
78 |
+
raise ValueError(f"No matches found for the pattern {pattern}")
|
79 |
+
|
80 |
+
checkpoint = matches[0]
|
81 |
+
|
82 |
+
else:
|
83 |
+
return model
|
84 |
+
model = load_wts(model, checkpoint)
|
85 |
+
print("Loaded weights from {0}".format(checkpoint))
|
86 |
+
return model
|
87 |
+
|
88 |
+
|
89 |
+
def main_worker(gpu, ngpus_per_node, config):
|
90 |
+
try:
|
91 |
+
fix_random_seed(43)
|
92 |
+
|
93 |
+
config.gpu = gpu
|
94 |
+
|
95 |
+
model = build_model(config)
|
96 |
+
|
97 |
+
# print(model)
|
98 |
+
|
99 |
+
model = load_ckpt(config, model)
|
100 |
+
model = parallelize(config, model)
|
101 |
+
|
102 |
+
total_params = f"{round(count_parameters(model)/1e6,2)}M"
|
103 |
+
config.total_params = total_params
|
104 |
+
print(f"Total parameters : {total_params}")
|
105 |
+
|
106 |
+
train_loader = MixedNYUKITTI(config, "train").data
|
107 |
+
test_loader = MixedNYUKITTI(config, "online_eval").data
|
108 |
+
|
109 |
+
trainer = get_trainer(config)(
|
110 |
+
config, model, train_loader, test_loader, device=config.gpu)
|
111 |
+
|
112 |
+
trainer.train()
|
113 |
+
finally:
|
114 |
+
import wandb
|
115 |
+
wandb.finish()
|
116 |
+
|
117 |
+
|
118 |
+
if __name__ == '__main__':
|
119 |
+
mp.set_start_method('forkserver')
|
120 |
+
|
121 |
+
parser = argparse.ArgumentParser()
|
122 |
+
parser.add_argument("-m", "--model", type=str, default="synunet")
|
123 |
+
parser.add_argument("-d", "--dataset", type=str, default='mix')
|
124 |
+
parser.add_argument("--trainer", type=str, default=None)
|
125 |
+
|
126 |
+
args, unknown_args = parser.parse_known_args()
|
127 |
+
overwrite_kwargs = parse_unknown(unknown_args)
|
128 |
+
|
129 |
+
overwrite_kwargs["model"] = args.model
|
130 |
+
if args.trainer is not None:
|
131 |
+
overwrite_kwargs["trainer"] = args.trainer
|
132 |
+
|
133 |
+
config = get_config(args.model, "train", args.dataset, **overwrite_kwargs)
|
134 |
+
# git_commit()
|
135 |
+
if config.use_shared_dict:
|
136 |
+
shared_dict = mp.Manager().dict()
|
137 |
+
else:
|
138 |
+
shared_dict = None
|
139 |
+
config.shared_dict = shared_dict
|
140 |
+
|
141 |
+
config.batch_size = config.bs
|
142 |
+
config.mode = 'train'
|
143 |
+
if config.root != "." and not os.path.isdir(config.root):
|
144 |
+
os.makedirs(config.root)
|
145 |
+
|
146 |
+
try:
|
147 |
+
node_str = os.environ['SLURM_JOB_NODELIST'].replace(
|
148 |
+
'[', '').replace(']', '')
|
149 |
+
nodes = node_str.split(',')
|
150 |
+
|
151 |
+
config.world_size = len(nodes)
|
152 |
+
config.rank = int(os.environ['SLURM_PROCID'])
|
153 |
+
# config.save_dir = "/ibex/scratch/bhatsf/videodepth/checkpoints"
|
154 |
+
|
155 |
+
except KeyError as e:
|
156 |
+
# We are NOT using SLURM
|
157 |
+
config.world_size = 1
|
158 |
+
config.rank = 0
|
159 |
+
nodes = ["127.0.0.1"]
|
160 |
+
|
161 |
+
if config.distributed:
|
162 |
+
|
163 |
+
print(config.rank)
|
164 |
+
port = np.random.randint(15000, 15025)
|
165 |
+
config.dist_url = 'tcp://{}:{}'.format(nodes[0], port)
|
166 |
+
print(config.dist_url)
|
167 |
+
config.dist_backend = 'nccl'
|
168 |
+
config.gpu = None
|
169 |
+
|
170 |
+
ngpus_per_node = torch.cuda.device_count()
|
171 |
+
config.num_workers = config.workers
|
172 |
+
config.ngpus_per_node = ngpus_per_node
|
173 |
+
print("Config:")
|
174 |
+
pprint(config)
|
175 |
+
if config.distributed:
|
176 |
+
config.world_size = ngpus_per_node * config.world_size
|
177 |
+
mp.spawn(main_worker, nprocs=ngpus_per_node,
|
178 |
+
args=(ngpus_per_node, config))
|
179 |
+
else:
|
180 |
+
if ngpus_per_node == 1:
|
181 |
+
config.gpu = 0
|
182 |
+
main_worker(config.gpu, ngpus_per_node, config)
|
metric_depth/train_mono.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
from zoedepth.utils.misc import count_parameters, parallelize
|
26 |
+
from zoedepth.utils.config import get_config
|
27 |
+
from zoedepth.utils.arg_utils import parse_unknown
|
28 |
+
from zoedepth.trainers.builder import get_trainer
|
29 |
+
from zoedepth.models.builder import build_model
|
30 |
+
from zoedepth.data.data_mono import DepthDataLoader
|
31 |
+
import torch.utils.data.distributed
|
32 |
+
import torch.multiprocessing as mp
|
33 |
+
import torch
|
34 |
+
import numpy as np
|
35 |
+
from pprint import pprint
|
36 |
+
import argparse
|
37 |
+
import os
|
38 |
+
|
39 |
+
os.environ["PYOPENGL_PLATFORM"] = "egl"
|
40 |
+
os.environ["WANDB_START_METHOD"] = "thread"
|
41 |
+
|
42 |
+
|
43 |
+
def fix_random_seed(seed: int):
|
44 |
+
import random
|
45 |
+
|
46 |
+
import numpy
|
47 |
+
import torch
|
48 |
+
|
49 |
+
random.seed(seed)
|
50 |
+
numpy.random.seed(seed)
|
51 |
+
torch.manual_seed(seed)
|
52 |
+
torch.cuda.manual_seed(seed)
|
53 |
+
torch.cuda.manual_seed_all(seed)
|
54 |
+
|
55 |
+
torch.backends.cudnn.deterministic = True
|
56 |
+
torch.backends.cudnn.benchmark = True
|
57 |
+
|
58 |
+
|
59 |
+
def load_ckpt(config, model, checkpoint_dir="./checkpoints", ckpt_type="best"):
|
60 |
+
import glob
|
61 |
+
import os
|
62 |
+
|
63 |
+
from zoedepth.models.model_io import load_wts
|
64 |
+
|
65 |
+
if hasattr(config, "checkpoint"):
|
66 |
+
checkpoint = config.checkpoint
|
67 |
+
elif hasattr(config, "ckpt_pattern"):
|
68 |
+
pattern = config.ckpt_pattern
|
69 |
+
matches = glob.glob(os.path.join(
|
70 |
+
checkpoint_dir, f"*{pattern}*{ckpt_type}*"))
|
71 |
+
if not (len(matches) > 0):
|
72 |
+
raise ValueError(f"No matches found for the pattern {pattern}")
|
73 |
+
|
74 |
+
checkpoint = matches[0]
|
75 |
+
|
76 |
+
else:
|
77 |
+
return model
|
78 |
+
model = load_wts(model, checkpoint)
|
79 |
+
print("Loaded weights from {0}".format(checkpoint))
|
80 |
+
return model
|
81 |
+
|
82 |
+
|
83 |
+
def main_worker(gpu, ngpus_per_node, config):
|
84 |
+
try:
|
85 |
+
seed = config.seed if 'seed' in config and config.seed else 43
|
86 |
+
fix_random_seed(seed)
|
87 |
+
|
88 |
+
config.gpu = gpu
|
89 |
+
|
90 |
+
model = build_model(config)
|
91 |
+
# print(model)
|
92 |
+
|
93 |
+
model = load_ckpt(config, model)
|
94 |
+
model = parallelize(config, model)
|
95 |
+
|
96 |
+
total_params = f"{round(count_parameters(model)/1e6,2)}M"
|
97 |
+
config.total_params = total_params
|
98 |
+
print(f"Total parameters : {total_params}")
|
99 |
+
|
100 |
+
train_loader = DepthDataLoader(config, "train").data
|
101 |
+
test_loader = DepthDataLoader(config, "online_eval").data
|
102 |
+
|
103 |
+
trainer = get_trainer(config)(
|
104 |
+
config, model, train_loader, test_loader, device=config.gpu)
|
105 |
+
|
106 |
+
trainer.train()
|
107 |
+
finally:
|
108 |
+
import wandb
|
109 |
+
wandb.finish()
|
110 |
+
|
111 |
+
|
112 |
+
if __name__ == '__main__':
|
113 |
+
mp.set_start_method('forkserver')
|
114 |
+
|
115 |
+
parser = argparse.ArgumentParser()
|
116 |
+
parser.add_argument("-m", "--model", type=str, default="synunet")
|
117 |
+
parser.add_argument("-d", "--dataset", type=str, default='nyu')
|
118 |
+
parser.add_argument("--trainer", type=str, default=None)
|
119 |
+
|
120 |
+
args, unknown_args = parser.parse_known_args()
|
121 |
+
overwrite_kwargs = parse_unknown(unknown_args)
|
122 |
+
|
123 |
+
overwrite_kwargs["model"] = args.model
|
124 |
+
if args.trainer is not None:
|
125 |
+
overwrite_kwargs["trainer"] = args.trainer
|
126 |
+
|
127 |
+
config = get_config(args.model, "train", args.dataset, **overwrite_kwargs)
|
128 |
+
# git_commit()
|
129 |
+
if config.use_shared_dict:
|
130 |
+
shared_dict = mp.Manager().dict()
|
131 |
+
else:
|
132 |
+
shared_dict = None
|
133 |
+
config.shared_dict = shared_dict
|
134 |
+
|
135 |
+
config.batch_size = config.bs
|
136 |
+
config.mode = 'train'
|
137 |
+
if config.root != "." and not os.path.isdir(config.root):
|
138 |
+
os.makedirs(config.root)
|
139 |
+
|
140 |
+
try:
|
141 |
+
node_str = os.environ['SLURM_JOB_NODELIST'].replace(
|
142 |
+
'[', '').replace(']', '')
|
143 |
+
nodes = node_str.split(',')
|
144 |
+
|
145 |
+
config.world_size = len(nodes)
|
146 |
+
config.rank = int(os.environ['SLURM_PROCID'])
|
147 |
+
# config.save_dir = "/ibex/scratch/bhatsf/videodepth/checkpoints"
|
148 |
+
|
149 |
+
except KeyError as e:
|
150 |
+
# We are NOT using SLURM
|
151 |
+
config.world_size = 1
|
152 |
+
config.rank = 0
|
153 |
+
nodes = ["127.0.0.1"]
|
154 |
+
|
155 |
+
if config.distributed:
|
156 |
+
|
157 |
+
print(config.rank)
|
158 |
+
port = np.random.randint(15000, 15025)
|
159 |
+
config.dist_url = 'tcp://{}:{}'.format(nodes[0], port)
|
160 |
+
print(config.dist_url)
|
161 |
+
config.dist_backend = 'nccl'
|
162 |
+
config.gpu = None
|
163 |
+
|
164 |
+
ngpus_per_node = torch.cuda.device_count()
|
165 |
+
config.num_workers = config.workers
|
166 |
+
config.ngpus_per_node = ngpus_per_node
|
167 |
+
print("Config:")
|
168 |
+
pprint(config)
|
169 |
+
if config.distributed:
|
170 |
+
config.world_size = ngpus_per_node * config.world_size
|
171 |
+
mp.spawn(main_worker, nprocs=ngpus_per_node,
|
172 |
+
args=(ngpus_per_node, config))
|
173 |
+
else:
|
174 |
+
if ngpus_per_node == 1:
|
175 |
+
config.gpu = 0
|
176 |
+
main_worker(config.gpu, ngpus_per_node, config)
|
metric_depth/train_test_inputs/kitti_eigen_test_files_with_gt.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
metric_depth/train_test_inputs/kitti_eigen_train_files_with_gt.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
metric_depth/train_test_inputs/nyudepthv2_test_files_with_gt.txt
ADDED
@@ -0,0 +1,654 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
bathroom/rgb_00045.jpg bathroom/sync_depth_00045.png 518.8579
|
2 |
+
bathroom/rgb_00046.jpg bathroom/sync_depth_00046.png 518.8579
|
3 |
+
bathroom/rgb_00507.jpg bathroom/sync_depth_00507.png 518.8579
|
4 |
+
bathroom/rgb_00508.jpg bathroom/sync_depth_00508.png 518.8579
|
5 |
+
bathroom/rgb_00509.jpg bathroom/sync_depth_00509.png 518.8579
|
6 |
+
bathroom/rgb_00510.jpg bathroom/sync_depth_00510.png 518.8579
|
7 |
+
bathroom/rgb_00511.jpg bathroom/sync_depth_00511.png 518.8579
|
8 |
+
bathroom/rgb_00512.jpg bathroom/sync_depth_00512.png 518.8579
|
9 |
+
bathroom/rgb_00649.jpg bathroom/sync_depth_00649.png 518.8579
|
10 |
+
bathroom/rgb_00650.jpg bathroom/sync_depth_00650.png 518.8579
|
11 |
+
bathroom/rgb_00655.jpg bathroom/sync_depth_00655.png 518.8579
|
12 |
+
bathroom/rgb_00656.jpg bathroom/sync_depth_00656.png 518.8579
|
13 |
+
bathroom/rgb_00657.jpg bathroom/sync_depth_00657.png 518.8579
|
14 |
+
bathroom/rgb_00662.jpg bathroom/sync_depth_00662.png 518.8579
|
15 |
+
bathroom/rgb_00663.jpg bathroom/sync_depth_00663.png 518.8579
|
16 |
+
bathroom/rgb_00667.jpg bathroom/sync_depth_00667.png 518.8579
|
17 |
+
bathroom/rgb_00668.jpg bathroom/sync_depth_00668.png 518.8579
|
18 |
+
bathroom/rgb_00670.jpg bathroom/sync_depth_00670.png 518.8579
|
19 |
+
bathroom/rgb_00671.jpg bathroom/sync_depth_00671.png 518.8579
|
20 |
+
bathroom/rgb_00672.jpg bathroom/sync_depth_00672.png 518.8579
|
21 |
+
bathroom/rgb_00675.jpg bathroom/sync_depth_00675.png 518.8579
|
22 |
+
bathroom/rgb_00676.jpg bathroom/sync_depth_00676.png 518.8579
|
23 |
+
bathroom/rgb_00677.jpg bathroom/sync_depth_00677.png 518.8579
|
24 |
+
bathroom/rgb_00678.jpg bathroom/sync_depth_00678.png 518.8579
|
25 |
+
bathroom/rgb_00679.jpg bathroom/sync_depth_00679.png 518.8579
|
26 |
+
bathroom/rgb_00680.jpg bathroom/sync_depth_00680.png 518.8579
|
27 |
+
bathroom/rgb_00685.jpg bathroom/sync_depth_00685.png 518.8579
|
28 |
+
bathroom/rgb_00686.jpg bathroom/sync_depth_00686.png 518.8579
|
29 |
+
bathroom/rgb_00687.jpg bathroom/sync_depth_00687.png 518.8579
|
30 |
+
bathroom/rgb_00688.jpg bathroom/sync_depth_00688.png 518.8579
|
31 |
+
bathroom/rgb_00689.jpg bathroom/sync_depth_00689.png 518.8579
|
32 |
+
bathroom/rgb_00692.jpg bathroom/sync_depth_00692.png 518.8579
|
33 |
+
bathroom/rgb_00693.jpg bathroom/sync_depth_00693.png 518.8579
|
34 |
+
bathroom/rgb_00696.jpg bathroom/sync_depth_00696.png 518.8579
|
35 |
+
bathroom/rgb_00669.jpg bathroom/sync_depth_00669.png 518.8579
|
36 |
+
bathroom/rgb_00697.jpg bathroom/sync_depth_00697.png 518.8579
|
37 |
+
bathroom/rgb_00698.jpg bathroom/sync_depth_00698.png 518.8579
|
38 |
+
bathroom/rgb_00705.jpg bathroom/sync_depth_00705.png 518.8579
|
39 |
+
bathroom/rgb_00706.jpg bathroom/sync_depth_00706.png 518.8579
|
40 |
+
bathroom/rgb_00707.jpg bathroom/sync_depth_00707.png 518.8579
|
41 |
+
bathroom/rgb_00708.jpg bathroom/sync_depth_00708.png 518.8579
|
42 |
+
bathroom/rgb_00709.jpg bathroom/sync_depth_00709.png 518.8579
|
43 |
+
bathroom/rgb_00710.jpg bathroom/sync_depth_00710.png 518.8579
|
44 |
+
bathroom/rgb_00711.jpg bathroom/sync_depth_00711.png 518.8579
|
45 |
+
bathroom/rgb_00712.jpg bathroom/sync_depth_00712.png 518.8579
|
46 |
+
bathroom/rgb_00716.jpg bathroom/sync_depth_00716.png 518.8579
|
47 |
+
bathroom/rgb_00717.jpg bathroom/sync_depth_00717.png 518.8579
|
48 |
+
bathroom/rgb_00723.jpg bathroom/sync_depth_00723.png 518.8579
|
49 |
+
bathroom/rgb_00724.jpg bathroom/sync_depth_00724.png 518.8579
|
50 |
+
bathroom/rgb_00725.jpg bathroom/sync_depth_00725.png 518.8579
|
51 |
+
bathroom/rgb_00726.jpg bathroom/sync_depth_00726.png 518.8579
|
52 |
+
bathroom/rgb_00727.jpg bathroom/sync_depth_00727.png 518.8579
|
53 |
+
bathroom/rgb_00730.jpg bathroom/sync_depth_00730.png 518.8579
|
54 |
+
bathroom/rgb_00731.jpg bathroom/sync_depth_00731.png 518.8579
|
55 |
+
bathroom/rgb_00732.jpg bathroom/sync_depth_00732.png 518.8579
|
56 |
+
bathroom/rgb_00733.jpg bathroom/sync_depth_00733.png 518.8579
|
57 |
+
bathroom/rgb_00742.jpg bathroom/sync_depth_00742.png 518.8579
|
58 |
+
bathroom/rgb_00743.jpg bathroom/sync_depth_00743.png 518.8579
|
59 |
+
bedroom/rgb_00055.jpg bedroom/sync_depth_00055.png 518.8579
|
60 |
+
bedroom/rgb_00056.jpg bedroom/sync_depth_00056.png 518.8579
|
61 |
+
bedroom/rgb_00058.jpg bedroom/sync_depth_00058.png 518.8579
|
62 |
+
bedroom/rgb_00059.jpg bedroom/sync_depth_00059.png 518.8579
|
63 |
+
bedroom/rgb_00060.jpg bedroom/sync_depth_00060.png 518.8579
|
64 |
+
bedroom/rgb_00061.jpg bedroom/sync_depth_00061.png 518.8579
|
65 |
+
bedroom/rgb_00062.jpg bedroom/sync_depth_00062.png 518.8579
|
66 |
+
bedroom/rgb_00075.jpg bedroom/sync_depth_00075.png 518.8579
|
67 |
+
bedroom/rgb_00076.jpg bedroom/sync_depth_00076.png 518.8579
|
68 |
+
bedroom/rgb_00077.jpg bedroom/sync_depth_00077.png 518.8579
|
69 |
+
bedroom/rgb_00078.jpg bedroom/sync_depth_00078.png 518.8579
|
70 |
+
bedroom/rgb_00170.jpg bedroom/sync_depth_00170.png 518.8579
|
71 |
+
bedroom/rgb_00171.jpg bedroom/sync_depth_00171.png 518.8579
|
72 |
+
bedroom/rgb_00172.jpg bedroom/sync_depth_00172.png 518.8579
|
73 |
+
bedroom/rgb_00173.jpg bedroom/sync_depth_00173.png 518.8579
|
74 |
+
bedroom/rgb_00174.jpg bedroom/sync_depth_00174.png 518.8579
|
75 |
+
bedroom/rgb_00175.jpg bedroom/sync_depth_00175.png 518.8579
|
76 |
+
bedroom/rgb_00180.jpg bedroom/sync_depth_00180.png 518.8579
|
77 |
+
bedroom/rgb_00181.jpg bedroom/sync_depth_00181.png 518.8579
|
78 |
+
bedroom/rgb_00182.jpg bedroom/sync_depth_00182.png 518.8579
|
79 |
+
bedroom/rgb_00183.jpg bedroom/sync_depth_00183.png 518.8579
|
80 |
+
bedroom/rgb_00184.jpg bedroom/sync_depth_00184.png 518.8579
|
81 |
+
bedroom/rgb_00185.jpg bedroom/sync_depth_00185.png 518.8579
|
82 |
+
bedroom/rgb_00186.jpg bedroom/sync_depth_00186.png 518.8579
|
83 |
+
bedroom/rgb_00187.jpg bedroom/sync_depth_00187.png 518.8579
|
84 |
+
bedroom/rgb_00188.jpg bedroom/sync_depth_00188.png 518.8579
|
85 |
+
bedroom/rgb_00189.jpg bedroom/sync_depth_00189.png 518.8579
|
86 |
+
bedroom/rgb_00190.jpg bedroom/sync_depth_00190.png 518.8579
|
87 |
+
bedroom/rgb_00191.jpg bedroom/sync_depth_00191.png 518.8579
|
88 |
+
bedroom/rgb_00192.jpg bedroom/sync_depth_00192.png 518.8579
|
89 |
+
bedroom/rgb_00219.jpg bedroom/sync_depth_00219.png 518.8579
|
90 |
+
bedroom/rgb_00220.jpg bedroom/sync_depth_00220.png 518.8579
|
91 |
+
bedroom/rgb_00221.jpg bedroom/sync_depth_00221.png 518.8579
|
92 |
+
bedroom/rgb_00279.jpg bedroom/sync_depth_00279.png 518.8579
|
93 |
+
bedroom/rgb_00179.jpg bedroom/sync_depth_00179.png 518.8579
|
94 |
+
bedroom/rgb_00280.jpg bedroom/sync_depth_00280.png 518.8579
|
95 |
+
bedroom/rgb_00536.jpg bedroom/sync_depth_00536.png 518.8579
|
96 |
+
bedroom/rgb_00960.jpg bedroom/sync_depth_00960.png 518.8579
|
97 |
+
bedroom/rgb_01000.jpg bedroom/sync_depth_01000.png 518.8579
|
98 |
+
bedroom/rgb_01052.jpg bedroom/sync_depth_01052.png 518.8579
|
99 |
+
bedroom/rgb_01092.jpg bedroom/sync_depth_01092.png 518.8579
|
100 |
+
bedroom/rgb_01122.jpg bedroom/sync_depth_01122.png 518.8579
|
101 |
+
bedroom/rgb_01150.jpg bedroom/sync_depth_01150.png 518.8579
|
102 |
+
bedroom/rgb_00281.jpg bedroom/sync_depth_00281.png 518.8579
|
103 |
+
bedroom/rgb_00282.jpg bedroom/sync_depth_00282.png 518.8579
|
104 |
+
bedroom/rgb_00514.jpg bedroom/sync_depth_00514.png 518.8579
|
105 |
+
bedroom/rgb_00515.jpg bedroom/sync_depth_00515.png 518.8579
|
106 |
+
bedroom/rgb_00516.jpg bedroom/sync_depth_00516.png 518.8579
|
107 |
+
bedroom/rgb_00517.jpg bedroom/sync_depth_00517.png 518.8579
|
108 |
+
bedroom/rgb_00518.jpg bedroom/sync_depth_00518.png 518.8579
|
109 |
+
bedroom/rgb_00519.jpg bedroom/sync_depth_00519.png 518.8579
|
110 |
+
bedroom/rgb_00520.jpg bedroom/sync_depth_00520.png 518.8579
|
111 |
+
bedroom/rgb_00521.jpg bedroom/sync_depth_00521.png 518.8579
|
112 |
+
bedroom/rgb_00522.jpg bedroom/sync_depth_00522.png 518.8579
|
113 |
+
bedroom/rgb_00523.jpg bedroom/sync_depth_00523.png 518.8579
|
114 |
+
bedroom/rgb_00524.jpg bedroom/sync_depth_00524.png 518.8579
|
115 |
+
bedroom/rgb_00525.jpg bedroom/sync_depth_00525.png 518.8579
|
116 |
+
bedroom/rgb_00530.jpg bedroom/sync_depth_00530.png 518.8579
|
117 |
+
bedroom/rgb_00531.jpg bedroom/sync_depth_00531.png 518.8579
|
118 |
+
bedroom/rgb_00532.jpg bedroom/sync_depth_00532.png 518.8579
|
119 |
+
bedroom/rgb_00537.jpg bedroom/sync_depth_00537.png 518.8579
|
120 |
+
bedroom/rgb_00538.jpg bedroom/sync_depth_00538.png 518.8579
|
121 |
+
bedroom/rgb_00916.jpg bedroom/sync_depth_00916.png 518.8579
|
122 |
+
bedroom/rgb_00917.jpg bedroom/sync_depth_00917.png 518.8579
|
123 |
+
bedroom/rgb_00918.jpg bedroom/sync_depth_00918.png 518.8579
|
124 |
+
bedroom/rgb_00925.jpg bedroom/sync_depth_00925.png 518.8579
|
125 |
+
bedroom/rgb_00926.jpg bedroom/sync_depth_00926.png 518.8579
|
126 |
+
bedroom/rgb_00927.jpg bedroom/sync_depth_00927.png 518.8579
|
127 |
+
bedroom/rgb_00931.jpg bedroom/sync_depth_00931.png 518.8579
|
128 |
+
bedroom/rgb_00932.jpg bedroom/sync_depth_00932.png 518.8579
|
129 |
+
bedroom/rgb_00933.jpg bedroom/sync_depth_00933.png 518.8579
|
130 |
+
bedroom/rgb_00934.jpg bedroom/sync_depth_00934.png 518.8579
|
131 |
+
bedroom/rgb_00944.jpg bedroom/sync_depth_00944.png 518.8579
|
132 |
+
bedroom/rgb_00945.jpg bedroom/sync_depth_00945.png 518.8579
|
133 |
+
bedroom/rgb_00946.jpg bedroom/sync_depth_00946.png 518.8579
|
134 |
+
bedroom/rgb_00958.jpg bedroom/sync_depth_00958.png 518.8579
|
135 |
+
bedroom/rgb_00959.jpg bedroom/sync_depth_00959.png 518.8579
|
136 |
+
bedroom/rgb_00961.jpg bedroom/sync_depth_00961.png 518.8579
|
137 |
+
bedroom/rgb_00964.jpg bedroom/sync_depth_00964.png 518.8579
|
138 |
+
bedroom/rgb_00965.jpg bedroom/sync_depth_00965.png 518.8579
|
139 |
+
bedroom/rgb_00966.jpg bedroom/sync_depth_00966.png 518.8579
|
140 |
+
bedroom/rgb_00969.jpg bedroom/sync_depth_00969.png 518.8579
|
141 |
+
bedroom/rgb_00970.jpg bedroom/sync_depth_00970.png 518.8579
|
142 |
+
bedroom/rgb_00971.jpg bedroom/sync_depth_00971.png 518.8579
|
143 |
+
bedroom/rgb_00972.jpg bedroom/sync_depth_00972.png 518.8579
|
144 |
+
bedroom/rgb_00973.jpg bedroom/sync_depth_00973.png 518.8579
|
145 |
+
bedroom/rgb_00974.jpg bedroom/sync_depth_00974.png 518.8579
|
146 |
+
bedroom/rgb_00975.jpg bedroom/sync_depth_00975.png 518.8579
|
147 |
+
bedroom/rgb_00976.jpg bedroom/sync_depth_00976.png 518.8579
|
148 |
+
bedroom/rgb_00990.jpg bedroom/sync_depth_00990.png 518.8579
|
149 |
+
bedroom/rgb_00991.jpg bedroom/sync_depth_00991.png 518.8579
|
150 |
+
bedroom/rgb_00992.jpg bedroom/sync_depth_00992.png 518.8579
|
151 |
+
bedroom/rgb_00993.jpg bedroom/sync_depth_00993.png 518.8579
|
152 |
+
bedroom/rgb_00994.jpg bedroom/sync_depth_00994.png 518.8579
|
153 |
+
bedroom/rgb_01001.jpg bedroom/sync_depth_01001.png 518.8579
|
154 |
+
bedroom/rgb_01002.jpg bedroom/sync_depth_01002.png 518.8579
|
155 |
+
bedroom/rgb_01003.jpg bedroom/sync_depth_01003.png 518.8579
|
156 |
+
bedroom/rgb_01009.jpg bedroom/sync_depth_01009.png 518.8579
|
157 |
+
bedroom/rgb_01010.jpg bedroom/sync_depth_01010.png 518.8579
|
158 |
+
bedroom/rgb_01011.jpg bedroom/sync_depth_01011.png 518.8579
|
159 |
+
bedroom/rgb_01020.jpg bedroom/sync_depth_01020.png 518.8579
|
160 |
+
bedroom/rgb_01021.jpg bedroom/sync_depth_01021.png 518.8579
|
161 |
+
bedroom/rgb_01022.jpg bedroom/sync_depth_01022.png 518.8579
|
162 |
+
bedroom/rgb_01031.jpg bedroom/sync_depth_01031.png 518.8579
|
163 |
+
bedroom/rgb_01032.jpg bedroom/sync_depth_01032.png 518.8579
|
164 |
+
bedroom/rgb_01033.jpg bedroom/sync_depth_01033.png 518.8579
|
165 |
+
bedroom/rgb_01037.jpg bedroom/sync_depth_01037.png 518.8579
|
166 |
+
bedroom/rgb_01038.jpg bedroom/sync_depth_01038.png 518.8579
|
167 |
+
bedroom/rgb_01047.jpg bedroom/sync_depth_01047.png 518.8579
|
168 |
+
bedroom/rgb_01048.jpg bedroom/sync_depth_01048.png 518.8579
|
169 |
+
bedroom/rgb_01051.jpg bedroom/sync_depth_01051.png 518.8579
|
170 |
+
bedroom/rgb_01056.jpg bedroom/sync_depth_01056.png 518.8579
|
171 |
+
bedroom/rgb_01057.jpg bedroom/sync_depth_01057.png 518.8579
|
172 |
+
bedroom/rgb_01074.jpg bedroom/sync_depth_01074.png 518.8579
|
173 |
+
bedroom/rgb_01075.jpg bedroom/sync_depth_01075.png 518.8579
|
174 |
+
bedroom/rgb_01076.jpg bedroom/sync_depth_01076.png 518.8579
|
175 |
+
bedroom/rgb_01077.jpg bedroom/sync_depth_01077.png 518.8579
|
176 |
+
bedroom/rgb_01078.jpg bedroom/sync_depth_01078.png 518.8579
|
177 |
+
bedroom/rgb_01079.jpg bedroom/sync_depth_01079.png 518.8579
|
178 |
+
bedroom/rgb_01080.jpg bedroom/sync_depth_01080.png 518.8579
|
179 |
+
bedroom/rgb_01081.jpg bedroom/sync_depth_01081.png 518.8579
|
180 |
+
bedroom/rgb_01082.jpg bedroom/sync_depth_01082.png 518.8579
|
181 |
+
bedroom/rgb_01083.jpg bedroom/sync_depth_01083.png 518.8579
|
182 |
+
bedroom/rgb_01087.jpg bedroom/sync_depth_01087.png 518.8579
|
183 |
+
bedroom/rgb_01088.jpg bedroom/sync_depth_01088.png 518.8579
|
184 |
+
bedroom/rgb_01089.jpg bedroom/sync_depth_01089.png 518.8579
|
185 |
+
bedroom/rgb_01090.jpg bedroom/sync_depth_01090.png 518.8579
|
186 |
+
bedroom/rgb_01091.jpg bedroom/sync_depth_01091.png 518.8579
|
187 |
+
bedroom/rgb_01093.jpg bedroom/sync_depth_01093.png 518.8579
|
188 |
+
bedroom/rgb_01094.jpg bedroom/sync_depth_01094.png 518.8579
|
189 |
+
bedroom/rgb_01095.jpg bedroom/sync_depth_01095.png 518.8579
|
190 |
+
bedroom/rgb_01097.jpg bedroom/sync_depth_01097.png 518.8579
|
191 |
+
bedroom/rgb_01098.jpg bedroom/sync_depth_01098.png 518.8579
|
192 |
+
bedroom/rgb_01099.jpg bedroom/sync_depth_01099.png 518.8579
|
193 |
+
bedroom/rgb_01100.jpg bedroom/sync_depth_01100.png 518.8579
|
194 |
+
bedroom/rgb_01101.jpg bedroom/sync_depth_01101.png 518.8579
|
195 |
+
bedroom/rgb_01102.jpg bedroom/sync_depth_01102.png 518.8579
|
196 |
+
bedroom/rgb_01103.jpg bedroom/sync_depth_01103.png 518.8579
|
197 |
+
bedroom/rgb_01105.jpg bedroom/sync_depth_01105.png 518.8579
|
198 |
+
bedroom/rgb_01106.jpg bedroom/sync_depth_01106.png 518.8579
|
199 |
+
bedroom/rgb_01107.jpg bedroom/sync_depth_01107.png 518.8579
|
200 |
+
bedroom/rgb_01108.jpg bedroom/sync_depth_01108.png 518.8579
|
201 |
+
bedroom/rgb_01116.jpg bedroom/sync_depth_01116.png 518.8579
|
202 |
+
bedroom/rgb_01117.jpg bedroom/sync_depth_01117.png 518.8579
|
203 |
+
bedroom/rgb_01118.jpg bedroom/sync_depth_01118.png 518.8579
|
204 |
+
bedroom/rgb_01123.jpg bedroom/sync_depth_01123.png 518.8579
|
205 |
+
bedroom/rgb_01124.jpg bedroom/sync_depth_01124.png 518.8579
|
206 |
+
bedroom/rgb_01125.jpg bedroom/sync_depth_01125.png 518.8579
|
207 |
+
bedroom/rgb_01126.jpg bedroom/sync_depth_01126.png 518.8579
|
208 |
+
bedroom/rgb_01127.jpg bedroom/sync_depth_01127.png 518.8579
|
209 |
+
bedroom/rgb_01128.jpg bedroom/sync_depth_01128.png 518.8579
|
210 |
+
bedroom/rgb_01129.jpg bedroom/sync_depth_01129.png 518.8579
|
211 |
+
bedroom/rgb_01130.jpg bedroom/sync_depth_01130.png 518.8579
|
212 |
+
bedroom/rgb_01134.jpg bedroom/sync_depth_01134.png 518.8579
|
213 |
+
bedroom/rgb_01135.jpg bedroom/sync_depth_01135.png 518.8579
|
214 |
+
bedroom/rgb_01143.jpg bedroom/sync_depth_01143.png 518.8579
|
215 |
+
bedroom/rgb_01144.jpg bedroom/sync_depth_01144.png 518.8579
|
216 |
+
bedroom/rgb_01145.jpg bedroom/sync_depth_01145.png 518.8579
|
217 |
+
bedroom/rgb_01146.jpg bedroom/sync_depth_01146.png 518.8579
|
218 |
+
bedroom/rgb_01147.jpg bedroom/sync_depth_01147.png 518.8579
|
219 |
+
bedroom/rgb_01148.jpg bedroom/sync_depth_01148.png 518.8579
|
220 |
+
bedroom/rgb_01149.jpg bedroom/sync_depth_01149.png 518.8579
|
221 |
+
bedroom/rgb_01151.jpg bedroom/sync_depth_01151.png 518.8579
|
222 |
+
bedroom/rgb_01152.jpg bedroom/sync_depth_01152.png 518.8579
|
223 |
+
bedroom/rgb_01153.jpg bedroom/sync_depth_01153.png 518.8579
|
224 |
+
bedroom/rgb_01154.jpg bedroom/sync_depth_01154.png 518.8579
|
225 |
+
bedroom/rgb_01155.jpg bedroom/sync_depth_01155.png 518.8579
|
226 |
+
bedroom/rgb_01156.jpg bedroom/sync_depth_01156.png 518.8579
|
227 |
+
bedroom/rgb_01157.jpg bedroom/sync_depth_01157.png 518.8579
|
228 |
+
bedroom/rgb_01161.jpg bedroom/sync_depth_01161.png 518.8579
|
229 |
+
bedroom/rgb_01162.jpg bedroom/sync_depth_01162.png 518.8579
|
230 |
+
bedroom/rgb_01163.jpg bedroom/sync_depth_01163.png 518.8579
|
231 |
+
bedroom/rgb_01164.jpg bedroom/sync_depth_01164.png 518.8579
|
232 |
+
bedroom/rgb_01165.jpg bedroom/sync_depth_01165.png 518.8579
|
233 |
+
bedroom/rgb_01166.jpg bedroom/sync_depth_01166.png 518.8579
|
234 |
+
bedroom/rgb_01169.jpg bedroom/sync_depth_01169.png 518.8579
|
235 |
+
bedroom/rgb_01170.jpg bedroom/sync_depth_01170.png 518.8579
|
236 |
+
bedroom/rgb_01173.jpg bedroom/sync_depth_01173.png 518.8579
|
237 |
+
bedroom/rgb_01174.jpg bedroom/sync_depth_01174.png 518.8579
|
238 |
+
bedroom/rgb_01175.jpg bedroom/sync_depth_01175.png 518.8579
|
239 |
+
bedroom/rgb_01178.jpg bedroom/sync_depth_01178.png 518.8579
|
240 |
+
bedroom/rgb_01179.jpg bedroom/sync_depth_01179.png 518.8579
|
241 |
+
bedroom/rgb_01180.jpg bedroom/sync_depth_01180.png 518.8579
|
242 |
+
bedroom/rgb_01181.jpg bedroom/sync_depth_01181.png 518.8579
|
243 |
+
bedroom/rgb_01182.jpg bedroom/sync_depth_01182.png 518.8579
|
244 |
+
bedroom/rgb_01183.jpg bedroom/sync_depth_01183.png 518.8579
|
245 |
+
bedroom/rgb_01191.jpg bedroom/sync_depth_01191.png 518.8579
|
246 |
+
bedroom/rgb_01192.jpg bedroom/sync_depth_01192.png 518.8579
|
247 |
+
bedroom/rgb_01193.jpg bedroom/sync_depth_01193.png 518.8579
|
248 |
+
bedroom/rgb_01194.jpg bedroom/sync_depth_01194.png 518.8579
|
249 |
+
bedroom/rgb_01195.jpg bedroom/sync_depth_01195.png 518.8579
|
250 |
+
bookstore/rgb_00083.jpg bookstore/sync_depth_00083.png 518.8579
|
251 |
+
bookstore/rgb_00084.jpg bookstore/sync_depth_00084.png 518.8579
|
252 |
+
bookstore/rgb_00085.jpg bookstore/sync_depth_00085.png 518.8579
|
253 |
+
bookstore/rgb_00086.jpg bookstore/sync_depth_00086.png 518.8579
|
254 |
+
bookstore/rgb_00087.jpg bookstore/sync_depth_00087.png 518.8579
|
255 |
+
bookstore/rgb_00088.jpg bookstore/sync_depth_00088.png 518.8579
|
256 |
+
bookstore/rgb_00089.jpg bookstore/sync_depth_00089.png 518.8579
|
257 |
+
bookstore/rgb_00090.jpg bookstore/sync_depth_00090.png 518.8579
|
258 |
+
bookstore/rgb_00116.jpg bookstore/sync_depth_00116.png 518.8579
|
259 |
+
bookstore/rgb_00117.jpg bookstore/sync_depth_00117.png 518.8579
|
260 |
+
bookstore/rgb_00118.jpg bookstore/sync_depth_00118.png 518.8579
|
261 |
+
classroom/rgb_00283.jpg classroom/sync_depth_00283.png 518.8579
|
262 |
+
classroom/rgb_00284.jpg classroom/sync_depth_00284.png 518.8579
|
263 |
+
classroom/rgb_00295.jpg classroom/sync_depth_00295.png 518.8579
|
264 |
+
classroom/rgb_00296.jpg classroom/sync_depth_00296.png 518.8579
|
265 |
+
classroom/rgb_00297.jpg classroom/sync_depth_00297.png 518.8579
|
266 |
+
classroom/rgb_00298.jpg classroom/sync_depth_00298.png 518.8579
|
267 |
+
classroom/rgb_00299.jpg classroom/sync_depth_00299.png 518.8579
|
268 |
+
classroom/rgb_00300.jpg classroom/sync_depth_00300.png 518.8579
|
269 |
+
classroom/rgb_00301.jpg classroom/sync_depth_00301.png 518.8579
|
270 |
+
classroom/rgb_00309.jpg classroom/sync_depth_00309.png 518.8579
|
271 |
+
classroom/rgb_00310.jpg classroom/sync_depth_00310.png 518.8579
|
272 |
+
classroom/rgb_00311.jpg classroom/sync_depth_00311.png 518.8579
|
273 |
+
classroom/rgb_00314.jpg classroom/sync_depth_00314.png 518.8579
|
274 |
+
classroom/rgb_00315.jpg classroom/sync_depth_00315.png 518.8579
|
275 |
+
classroom/rgb_00316.jpg classroom/sync_depth_00316.png 518.8579
|
276 |
+
classroom/rgb_00324.jpg classroom/sync_depth_00324.png 518.8579
|
277 |
+
classroom/rgb_00325.jpg classroom/sync_depth_00325.png 518.8579
|
278 |
+
classroom/rgb_00326.jpg classroom/sync_depth_00326.png 518.8579
|
279 |
+
classroom/rgb_00327.jpg classroom/sync_depth_00327.png 518.8579
|
280 |
+
classroom/rgb_00328.jpg classroom/sync_depth_00328.png 518.8579
|
281 |
+
classroom/rgb_00329.jpg classroom/sync_depth_00329.png 518.8579
|
282 |
+
classroom/rgb_00330.jpg classroom/sync_depth_00330.png 518.8579
|
283 |
+
classroom/rgb_00331.jpg classroom/sync_depth_00331.png 518.8579
|
284 |
+
computer_lab/rgb_00332.jpg computer_lab/sync_depth_00332.png 518.8579
|
285 |
+
computer_lab/rgb_00333.jpg computer_lab/sync_depth_00333.png 518.8579
|
286 |
+
computer_lab/rgb_00334.jpg computer_lab/sync_depth_00334.png 518.8579
|
287 |
+
dining_room/rgb_00548.jpg dining_room/sync_depth_00548.png 518.8579
|
288 |
+
dining_room/rgb_00549.jpg dining_room/sync_depth_00549.png 518.8579
|
289 |
+
dining_room/rgb_00550.jpg dining_room/sync_depth_00550.png 518.8579
|
290 |
+
dining_room/rgb_01346.jpg dining_room/sync_depth_01346.png 518.8579
|
291 |
+
dining_room/rgb_01347.jpg dining_room/sync_depth_01347.png 518.8579
|
292 |
+
dining_room/rgb_01348.jpg dining_room/sync_depth_01348.png 518.8579
|
293 |
+
dining_room/rgb_01352.jpg dining_room/sync_depth_01352.png 518.8579
|
294 |
+
dining_room/rgb_01353.jpg dining_room/sync_depth_01353.png 518.8579
|
295 |
+
dining_room/rgb_01354.jpg dining_room/sync_depth_01354.png 518.8579
|
296 |
+
dining_room/rgb_01355.jpg dining_room/sync_depth_01355.png 518.8579
|
297 |
+
dining_room/rgb_01363.jpg dining_room/sync_depth_01363.png 518.8579
|
298 |
+
dining_room/rgb_01364.jpg dining_room/sync_depth_01364.png 518.8579
|
299 |
+
dining_room/rgb_01367.jpg dining_room/sync_depth_01367.png 518.8579
|
300 |
+
dining_room/rgb_01368.jpg dining_room/sync_depth_01368.png 518.8579
|
301 |
+
dining_room/rgb_01383.jpg dining_room/sync_depth_01383.png 518.8579
|
302 |
+
dining_room/rgb_01384.jpg dining_room/sync_depth_01384.png 518.8579
|
303 |
+
dining_room/rgb_01385.jpg dining_room/sync_depth_01385.png 518.8579
|
304 |
+
dining_room/rgb_01387.jpg dining_room/sync_depth_01387.png 518.8579
|
305 |
+
dining_room/rgb_01388.jpg dining_room/sync_depth_01388.png 518.8579
|
306 |
+
dining_room/rgb_01389.jpg dining_room/sync_depth_01389.png 518.8579
|
307 |
+
dining_room/rgb_01390.jpg dining_room/sync_depth_01390.png 518.8579
|
308 |
+
dining_room/rgb_01393.jpg dining_room/sync_depth_01393.png 518.8579
|
309 |
+
dining_room/rgb_01394.jpg dining_room/sync_depth_01394.png 518.8579
|
310 |
+
dining_room/rgb_01395.jpg dining_room/sync_depth_01395.png 518.8579
|
311 |
+
dining_room/rgb_01396.jpg dining_room/sync_depth_01396.png 518.8579
|
312 |
+
dining_room/rgb_01397.jpg dining_room/sync_depth_01397.png 518.8579
|
313 |
+
dining_room/rgb_01398.jpg dining_room/sync_depth_01398.png 518.8579
|
314 |
+
dining_room/rgb_01399.jpg dining_room/sync_depth_01399.png 518.8579
|
315 |
+
dining_room/rgb_01400.jpg dining_room/sync_depth_01400.png 518.8579
|
316 |
+
dining_room/rgb_01406.jpg dining_room/sync_depth_01406.png 518.8579
|
317 |
+
dining_room/rgb_01407.jpg dining_room/sync_depth_01407.png 518.8579
|
318 |
+
dining_room/rgb_01408.jpg dining_room/sync_depth_01408.png 518.8579
|
319 |
+
dining_room/rgb_01409.jpg dining_room/sync_depth_01409.png 518.8579
|
320 |
+
dining_room/rgb_01410.jpg dining_room/sync_depth_01410.png 518.8579
|
321 |
+
dining_room/rgb_01386.jpg dining_room/sync_depth_01386.png 518.8579
|
322 |
+
dining_room/rgb_01411.jpg dining_room/sync_depth_01411.png 518.8579
|
323 |
+
dining_room/rgb_01412.jpg dining_room/sync_depth_01412.png 518.8579
|
324 |
+
dining_room/rgb_01413.jpg dining_room/sync_depth_01413.png 518.8579
|
325 |
+
dining_room/rgb_01420.jpg dining_room/sync_depth_01420.png 518.8579
|
326 |
+
dining_room/rgb_01421.jpg dining_room/sync_depth_01421.png 518.8579
|
327 |
+
dining_room/rgb_01422.jpg dining_room/sync_depth_01422.png 518.8579
|
328 |
+
dining_room/rgb_01423.jpg dining_room/sync_depth_01423.png 518.8579
|
329 |
+
dining_room/rgb_01429.jpg dining_room/sync_depth_01429.png 518.8579
|
330 |
+
dining_room/rgb_01430.jpg dining_room/sync_depth_01430.png 518.8579
|
331 |
+
dining_room/rgb_01431.jpg dining_room/sync_depth_01431.png 518.8579
|
332 |
+
dining_room/rgb_01432.jpg dining_room/sync_depth_01432.png 518.8579
|
333 |
+
dining_room/rgb_01440.jpg dining_room/sync_depth_01440.png 518.8579
|
334 |
+
dining_room/rgb_01441.jpg dining_room/sync_depth_01441.png 518.8579
|
335 |
+
dining_room/rgb_01442.jpg dining_room/sync_depth_01442.png 518.8579
|
336 |
+
dining_room/rgb_01443.jpg dining_room/sync_depth_01443.png 518.8579
|
337 |
+
dining_room/rgb_01444.jpg dining_room/sync_depth_01444.png 518.8579
|
338 |
+
dining_room/rgb_01445.jpg dining_room/sync_depth_01445.png 518.8579
|
339 |
+
dining_room/rgb_01446.jpg dining_room/sync_depth_01446.png 518.8579
|
340 |
+
dining_room/rgb_01447.jpg dining_room/sync_depth_01447.png 518.8579
|
341 |
+
dining_room/rgb_01448.jpg dining_room/sync_depth_01448.png 518.8579
|
342 |
+
foyer/rgb_00350.jpg foyer/sync_depth_00350.png 518.8579
|
343 |
+
foyer/rgb_00351.jpg foyer/sync_depth_00351.png 518.8579
|
344 |
+
home_office/rgb_00354.jpg home_office/sync_depth_00354.png 518.8579
|
345 |
+
home_office/rgb_00355.jpg home_office/sync_depth_00355.png 518.8579
|
346 |
+
home_office/rgb_00356.jpg home_office/sync_depth_00356.png 518.8579
|
347 |
+
home_office/rgb_00357.jpg home_office/sync_depth_00357.png 518.8579
|
348 |
+
home_office/rgb_00358.jpg home_office/sync_depth_00358.png 518.8579
|
349 |
+
home_office/rgb_00359.jpg home_office/sync_depth_00359.png 518.8579
|
350 |
+
home_office/rgb_00360.jpg home_office/sync_depth_00360.png 518.8579
|
351 |
+
home_office/rgb_00361.jpg home_office/sync_depth_00361.png 518.8579
|
352 |
+
home_office/rgb_00362.jpg home_office/sync_depth_00362.png 518.8579
|
353 |
+
home_office/rgb_00363.jpg home_office/sync_depth_00363.png 518.8579
|
354 |
+
home_office/rgb_00383.jpg home_office/sync_depth_00383.png 518.8579
|
355 |
+
home_office/rgb_00384.jpg home_office/sync_depth_00384.png 518.8579
|
356 |
+
home_office/rgb_00385.jpg home_office/sync_depth_00385.png 518.8579
|
357 |
+
home_office/rgb_00386.jpg home_office/sync_depth_00386.png 518.8579
|
358 |
+
home_office/rgb_00387.jpg home_office/sync_depth_00387.png 518.8579
|
359 |
+
home_office/rgb_00388.jpg home_office/sync_depth_00388.png 518.8579
|
360 |
+
home_office/rgb_00389.jpg home_office/sync_depth_00389.png 518.8579
|
361 |
+
home_office/rgb_00394.jpg home_office/sync_depth_00394.png 518.8579
|
362 |
+
home_office/rgb_00395.jpg home_office/sync_depth_00395.png 518.8579
|
363 |
+
home_office/rgb_00396.jpg home_office/sync_depth_00396.png 518.8579
|
364 |
+
home_office/rgb_00554.jpg home_office/sync_depth_00554.png 518.8579
|
365 |
+
home_office/rgb_00555.jpg home_office/sync_depth_00555.png 518.8579
|
366 |
+
home_office/rgb_00556.jpg home_office/sync_depth_00556.png 518.8579
|
367 |
+
home_office/rgb_00557.jpg home_office/sync_depth_00557.png 518.8579
|
368 |
+
kitchen/rgb_00000.jpg kitchen/sync_depth_00000.png 518.8579
|
369 |
+
kitchen/rgb_00001.jpg kitchen/sync_depth_00001.png 518.8579
|
370 |
+
kitchen/rgb_00124.jpg kitchen/sync_depth_00124.png 518.8579
|
371 |
+
kitchen/rgb_00125.jpg kitchen/sync_depth_00125.png 518.8579
|
372 |
+
kitchen/rgb_00126.jpg kitchen/sync_depth_00126.png 518.8579
|
373 |
+
kitchen/rgb_00127.jpg kitchen/sync_depth_00127.png 518.8579
|
374 |
+
kitchen/rgb_00128.jpg kitchen/sync_depth_00128.png 518.8579
|
375 |
+
kitchen/rgb_00130.jpg kitchen/sync_depth_00130.png 518.8579
|
376 |
+
kitchen/rgb_00131.jpg kitchen/sync_depth_00131.png 518.8579
|
377 |
+
kitchen/rgb_00132.jpg kitchen/sync_depth_00132.png 518.8579
|
378 |
+
kitchen/rgb_00133.jpg kitchen/sync_depth_00133.png 518.8579
|
379 |
+
kitchen/rgb_00136.jpg kitchen/sync_depth_00136.png 518.8579
|
380 |
+
kitchen/rgb_00193.jpg kitchen/sync_depth_00193.png 518.8579
|
381 |
+
kitchen/rgb_00194.jpg kitchen/sync_depth_00194.png 518.8579
|
382 |
+
kitchen/rgb_00195.jpg kitchen/sync_depth_00195.png 518.8579
|
383 |
+
kitchen/rgb_00196.jpg kitchen/sync_depth_00196.png 518.8579
|
384 |
+
kitchen/rgb_00197.jpg kitchen/sync_depth_00197.png 518.8579
|
385 |
+
kitchen/rgb_00199.jpg kitchen/sync_depth_00199.png 518.8579
|
386 |
+
kitchen/rgb_00200.jpg kitchen/sync_depth_00200.png 518.8579
|
387 |
+
kitchen/rgb_00201.jpg kitchen/sync_depth_00201.png 518.8579
|
388 |
+
kitchen/rgb_00249.jpg kitchen/sync_depth_00249.png 518.8579
|
389 |
+
kitchen/rgb_00558.jpg kitchen/sync_depth_00558.png 518.8579
|
390 |
+
kitchen/rgb_00559.jpg kitchen/sync_depth_00559.png 518.8579
|
391 |
+
kitchen/rgb_00560.jpg kitchen/sync_depth_00560.png 518.8579
|
392 |
+
kitchen/rgb_00561.jpg kitchen/sync_depth_00561.png 518.8579
|
393 |
+
kitchen/rgb_00562.jpg kitchen/sync_depth_00562.png 518.8579
|
394 |
+
kitchen/rgb_00563.jpg kitchen/sync_depth_00563.png 518.8579
|
395 |
+
kitchen/rgb_00564.jpg kitchen/sync_depth_00564.png 518.8579
|
396 |
+
kitchen/rgb_00565.jpg kitchen/sync_depth_00565.png 518.8579
|
397 |
+
kitchen/rgb_00566.jpg kitchen/sync_depth_00566.png 518.8579
|
398 |
+
kitchen/rgb_00567.jpg kitchen/sync_depth_00567.png 518.8579
|
399 |
+
kitchen/rgb_00568.jpg kitchen/sync_depth_00568.png 518.8579
|
400 |
+
kitchen/rgb_00569.jpg kitchen/sync_depth_00569.png 518.8579
|
401 |
+
kitchen/rgb_00570.jpg kitchen/sync_depth_00570.png 518.8579
|
402 |
+
kitchen/rgb_00198.jpg kitchen/sync_depth_00198.png 518.8579
|
403 |
+
kitchen/rgb_00758.jpg kitchen/sync_depth_00758.png 518.8579
|
404 |
+
kitchen/rgb_00776.jpg kitchen/sync_depth_00776.png 518.8579
|
405 |
+
kitchen/rgb_00811.jpg kitchen/sync_depth_00811.png 518.8579
|
406 |
+
kitchen/rgb_00844.jpg kitchen/sync_depth_00844.png 518.8579
|
407 |
+
kitchen/rgb_00759.jpg kitchen/sync_depth_00759.png 518.8579
|
408 |
+
kitchen/rgb_00760.jpg kitchen/sync_depth_00760.png 518.8579
|
409 |
+
kitchen/rgb_00761.jpg kitchen/sync_depth_00761.png 518.8579
|
410 |
+
kitchen/rgb_00762.jpg kitchen/sync_depth_00762.png 518.8579
|
411 |
+
kitchen/rgb_00763.jpg kitchen/sync_depth_00763.png 518.8579
|
412 |
+
kitchen/rgb_00764.jpg kitchen/sync_depth_00764.png 518.8579
|
413 |
+
kitchen/rgb_00765.jpg kitchen/sync_depth_00765.png 518.8579
|
414 |
+
kitchen/rgb_00766.jpg kitchen/sync_depth_00766.png 518.8579
|
415 |
+
kitchen/rgb_00767.jpg kitchen/sync_depth_00767.png 518.8579
|
416 |
+
kitchen/rgb_00768.jpg kitchen/sync_depth_00768.png 518.8579
|
417 |
+
kitchen/rgb_00769.jpg kitchen/sync_depth_00769.png 518.8579
|
418 |
+
kitchen/rgb_00770.jpg kitchen/sync_depth_00770.png 518.8579
|
419 |
+
kitchen/rgb_00771.jpg kitchen/sync_depth_00771.png 518.8579
|
420 |
+
kitchen/rgb_00772.jpg kitchen/sync_depth_00772.png 518.8579
|
421 |
+
kitchen/rgb_00773.jpg kitchen/sync_depth_00773.png 518.8579
|
422 |
+
kitchen/rgb_00774.jpg kitchen/sync_depth_00774.png 518.8579
|
423 |
+
kitchen/rgb_00775.jpg kitchen/sync_depth_00775.png 518.8579
|
424 |
+
kitchen/rgb_00777.jpg kitchen/sync_depth_00777.png 518.8579
|
425 |
+
kitchen/rgb_00778.jpg kitchen/sync_depth_00778.png 518.8579
|
426 |
+
kitchen/rgb_00779.jpg kitchen/sync_depth_00779.png 518.8579
|
427 |
+
kitchen/rgb_00780.jpg kitchen/sync_depth_00780.png 518.8579
|
428 |
+
kitchen/rgb_00781.jpg kitchen/sync_depth_00781.png 518.8579
|
429 |
+
kitchen/rgb_00782.jpg kitchen/sync_depth_00782.png 518.8579
|
430 |
+
kitchen/rgb_00783.jpg kitchen/sync_depth_00783.png 518.8579
|
431 |
+
kitchen/rgb_00784.jpg kitchen/sync_depth_00784.png 518.8579
|
432 |
+
kitchen/rgb_00785.jpg kitchen/sync_depth_00785.png 518.8579
|
433 |
+
kitchen/rgb_00786.jpg kitchen/sync_depth_00786.png 518.8579
|
434 |
+
kitchen/rgb_00799.jpg kitchen/sync_depth_00799.png 518.8579
|
435 |
+
kitchen/rgb_00800.jpg kitchen/sync_depth_00800.png 518.8579
|
436 |
+
kitchen/rgb_00801.jpg kitchen/sync_depth_00801.png 518.8579
|
437 |
+
kitchen/rgb_00802.jpg kitchen/sync_depth_00802.png 518.8579
|
438 |
+
kitchen/rgb_00803.jpg kitchen/sync_depth_00803.png 518.8579
|
439 |
+
kitchen/rgb_00809.jpg kitchen/sync_depth_00809.png 518.8579
|
440 |
+
kitchen/rgb_00810.jpg kitchen/sync_depth_00810.png 518.8579
|
441 |
+
kitchen/rgb_00812.jpg kitchen/sync_depth_00812.png 518.8579
|
442 |
+
kitchen/rgb_00813.jpg kitchen/sync_depth_00813.png 518.8579
|
443 |
+
kitchen/rgb_00820.jpg kitchen/sync_depth_00820.png 518.8579
|
444 |
+
kitchen/rgb_00821.jpg kitchen/sync_depth_00821.png 518.8579
|
445 |
+
kitchen/rgb_00822.jpg kitchen/sync_depth_00822.png 518.8579
|
446 |
+
kitchen/rgb_00832.jpg kitchen/sync_depth_00832.png 518.8579
|
447 |
+
kitchen/rgb_00833.jpg kitchen/sync_depth_00833.png 518.8579
|
448 |
+
kitchen/rgb_00834.jpg kitchen/sync_depth_00834.png 518.8579
|
449 |
+
kitchen/rgb_00835.jpg kitchen/sync_depth_00835.png 518.8579
|
450 |
+
kitchen/rgb_00836.jpg kitchen/sync_depth_00836.png 518.8579
|
451 |
+
kitchen/rgb_00837.jpg kitchen/sync_depth_00837.png 518.8579
|
452 |
+
kitchen/rgb_00838.jpg kitchen/sync_depth_00838.png 518.8579
|
453 |
+
kitchen/rgb_00839.jpg kitchen/sync_depth_00839.png 518.8579
|
454 |
+
kitchen/rgb_00840.jpg kitchen/sync_depth_00840.png 518.8579
|
455 |
+
kitchen/rgb_00841.jpg kitchen/sync_depth_00841.png 518.8579
|
456 |
+
kitchen/rgb_00842.jpg kitchen/sync_depth_00842.png 518.8579
|
457 |
+
kitchen/rgb_00843.jpg kitchen/sync_depth_00843.png 518.8579
|
458 |
+
kitchen/rgb_00845.jpg kitchen/sync_depth_00845.png 518.8579
|
459 |
+
kitchen/rgb_00849.jpg kitchen/sync_depth_00849.png 518.8579
|
460 |
+
kitchen/rgb_00850.jpg kitchen/sync_depth_00850.png 518.8579
|
461 |
+
kitchen/rgb_00851.jpg kitchen/sync_depth_00851.png 518.8579
|
462 |
+
kitchen/rgb_00856.jpg kitchen/sync_depth_00856.png 518.8579
|
463 |
+
kitchen/rgb_00857.jpg kitchen/sync_depth_00857.png 518.8579
|
464 |
+
kitchen/rgb_00858.jpg kitchen/sync_depth_00858.png 518.8579
|
465 |
+
kitchen/rgb_00859.jpg kitchen/sync_depth_00859.png 518.8579
|
466 |
+
kitchen/rgb_00860.jpg kitchen/sync_depth_00860.png 518.8579
|
467 |
+
kitchen/rgb_00861.jpg kitchen/sync_depth_00861.png 518.8579
|
468 |
+
kitchen/rgb_00868.jpg kitchen/sync_depth_00868.png 518.8579
|
469 |
+
kitchen/rgb_00869.jpg kitchen/sync_depth_00869.png 518.8579
|
470 |
+
kitchen/rgb_00870.jpg kitchen/sync_depth_00870.png 518.8579
|
471 |
+
kitchen/rgb_00905.jpg kitchen/sync_depth_00905.png 518.8579
|
472 |
+
kitchen/rgb_00906.jpg kitchen/sync_depth_00906.png 518.8579
|
473 |
+
kitchen/rgb_00907.jpg kitchen/sync_depth_00907.png 518.8579
|
474 |
+
living_room/rgb_00152.jpg living_room/sync_depth_00152.png 518.8579
|
475 |
+
living_room/rgb_00153.jpg living_room/sync_depth_00153.png 518.8579
|
476 |
+
living_room/rgb_00154.jpg living_room/sync_depth_00154.png 518.8579
|
477 |
+
living_room/rgb_00166.jpg living_room/sync_depth_00166.png 518.8579
|
478 |
+
living_room/rgb_00167.jpg living_room/sync_depth_00167.png 518.8579
|
479 |
+
living_room/rgb_00168.jpg living_room/sync_depth_00168.png 518.8579
|
480 |
+
living_room/rgb_00206.jpg living_room/sync_depth_00206.png 518.8579
|
481 |
+
living_room/rgb_00207.jpg living_room/sync_depth_00207.png 518.8579
|
482 |
+
living_room/rgb_00208.jpg living_room/sync_depth_00208.png 518.8579
|
483 |
+
living_room/rgb_00209.jpg living_room/sync_depth_00209.png 518.8579
|
484 |
+
living_room/rgb_00210.jpg living_room/sync_depth_00210.png 518.8579
|
485 |
+
living_room/rgb_00211.jpg living_room/sync_depth_00211.png 518.8579
|
486 |
+
living_room/rgb_00263.jpg living_room/sync_depth_00263.png 518.8579
|
487 |
+
living_room/rgb_00578.jpg living_room/sync_depth_00578.png 518.8579
|
488 |
+
living_room/rgb_00579.jpg living_room/sync_depth_00579.png 518.8579
|
489 |
+
living_room/rgb_00580.jpg living_room/sync_depth_00580.png 518.8579
|
490 |
+
living_room/rgb_00581.jpg living_room/sync_depth_00581.png 518.8579
|
491 |
+
living_room/rgb_00590.jpg living_room/sync_depth_00590.png 518.8579
|
492 |
+
living_room/rgb_00591.jpg living_room/sync_depth_00591.png 518.8579
|
493 |
+
living_room/rgb_00592.jpg living_room/sync_depth_00592.png 518.8579
|
494 |
+
living_room/rgb_00593.jpg living_room/sync_depth_00593.png 518.8579
|
495 |
+
living_room/rgb_00602.jpg living_room/sync_depth_00602.png 518.8579
|
496 |
+
living_room/rgb_00603.jpg living_room/sync_depth_00603.png 518.8579
|
497 |
+
living_room/rgb_00604.jpg living_room/sync_depth_00604.png 518.8579
|
498 |
+
living_room/rgb_00605.jpg living_room/sync_depth_00605.png 518.8579
|
499 |
+
living_room/rgb_00606.jpg living_room/sync_depth_00606.png 518.8579
|
500 |
+
living_room/rgb_01200.jpg living_room/sync_depth_01200.png 518.8579
|
501 |
+
living_room/rgb_01201.jpg living_room/sync_depth_01201.png 518.8579
|
502 |
+
living_room/rgb_01202.jpg living_room/sync_depth_01202.png 518.8579
|
503 |
+
living_room/rgb_01203.jpg living_room/sync_depth_01203.png 518.8579
|
504 |
+
living_room/rgb_01204.jpg living_room/sync_depth_01204.png 518.8579
|
505 |
+
living_room/rgb_01205.jpg living_room/sync_depth_01205.png 518.8579
|
506 |
+
living_room/rgb_01206.jpg living_room/sync_depth_01206.png 518.8579
|
507 |
+
living_room/rgb_01207.jpg living_room/sync_depth_01207.png 518.8579
|
508 |
+
living_room/rgb_00582.jpg living_room/sync_depth_00582.png 518.8579
|
509 |
+
living_room/rgb_01208.jpg living_room/sync_depth_01208.png 518.8579
|
510 |
+
living_room/rgb_01247.jpg living_room/sync_depth_01247.png 518.8579
|
511 |
+
living_room/rgb_01277.jpg living_room/sync_depth_01277.png 518.8579
|
512 |
+
living_room/rgb_01302.jpg living_room/sync_depth_01302.png 518.8579
|
513 |
+
living_room/rgb_01209.jpg living_room/sync_depth_01209.png 518.8579
|
514 |
+
living_room/rgb_01210.jpg living_room/sync_depth_01210.png 518.8579
|
515 |
+
living_room/rgb_01211.jpg living_room/sync_depth_01211.png 518.8579
|
516 |
+
living_room/rgb_01215.jpg living_room/sync_depth_01215.png 518.8579
|
517 |
+
living_room/rgb_01216.jpg living_room/sync_depth_01216.png 518.8579
|
518 |
+
living_room/rgb_01217.jpg living_room/sync_depth_01217.png 518.8579
|
519 |
+
living_room/rgb_01218.jpg living_room/sync_depth_01218.png 518.8579
|
520 |
+
living_room/rgb_01219.jpg living_room/sync_depth_01219.png 518.8579
|
521 |
+
living_room/rgb_01225.jpg living_room/sync_depth_01225.png 518.8579
|
522 |
+
living_room/rgb_01226.jpg living_room/sync_depth_01226.png 518.8579
|
523 |
+
living_room/rgb_01227.jpg living_room/sync_depth_01227.png 518.8579
|
524 |
+
living_room/rgb_01228.jpg living_room/sync_depth_01228.png 518.8579
|
525 |
+
living_room/rgb_01229.jpg living_room/sync_depth_01229.png 518.8579
|
526 |
+
living_room/rgb_01232.jpg living_room/sync_depth_01232.png 518.8579
|
527 |
+
living_room/rgb_01233.jpg living_room/sync_depth_01233.png 518.8579
|
528 |
+
living_room/rgb_01234.jpg living_room/sync_depth_01234.png 518.8579
|
529 |
+
living_room/rgb_01246.jpg living_room/sync_depth_01246.png 518.8579
|
530 |
+
living_room/rgb_01248.jpg living_room/sync_depth_01248.png 518.8579
|
531 |
+
living_room/rgb_01249.jpg living_room/sync_depth_01249.png 518.8579
|
532 |
+
living_room/rgb_01253.jpg living_room/sync_depth_01253.png 518.8579
|
533 |
+
living_room/rgb_01254.jpg living_room/sync_depth_01254.png 518.8579
|
534 |
+
living_room/rgb_01255.jpg living_room/sync_depth_01255.png 518.8579
|
535 |
+
living_room/rgb_01256.jpg living_room/sync_depth_01256.png 518.8579
|
536 |
+
living_room/rgb_01257.jpg living_room/sync_depth_01257.png 518.8579
|
537 |
+
living_room/rgb_01258.jpg living_room/sync_depth_01258.png 518.8579
|
538 |
+
living_room/rgb_01259.jpg living_room/sync_depth_01259.png 518.8579
|
539 |
+
living_room/rgb_01260.jpg living_room/sync_depth_01260.png 518.8579
|
540 |
+
living_room/rgb_01261.jpg living_room/sync_depth_01261.png 518.8579
|
541 |
+
living_room/rgb_01262.jpg living_room/sync_depth_01262.png 518.8579
|
542 |
+
living_room/rgb_01263.jpg living_room/sync_depth_01263.png 518.8579
|
543 |
+
living_room/rgb_01264.jpg living_room/sync_depth_01264.png 518.8579
|
544 |
+
living_room/rgb_01274.jpg living_room/sync_depth_01274.png 518.8579
|
545 |
+
living_room/rgb_01275.jpg living_room/sync_depth_01275.png 518.8579
|
546 |
+
living_room/rgb_01276.jpg living_room/sync_depth_01276.png 518.8579
|
547 |
+
living_room/rgb_01278.jpg living_room/sync_depth_01278.png 518.8579
|
548 |
+
living_room/rgb_01279.jpg living_room/sync_depth_01279.png 518.8579
|
549 |
+
living_room/rgb_01284.jpg living_room/sync_depth_01284.png 518.8579
|
550 |
+
living_room/rgb_01285.jpg living_room/sync_depth_01285.png 518.8579
|
551 |
+
living_room/rgb_01286.jpg living_room/sync_depth_01286.png 518.8579
|
552 |
+
living_room/rgb_01287.jpg living_room/sync_depth_01287.png 518.8579
|
553 |
+
living_room/rgb_01288.jpg living_room/sync_depth_01288.png 518.8579
|
554 |
+
living_room/rgb_01289.jpg living_room/sync_depth_01289.png 518.8579
|
555 |
+
living_room/rgb_01290.jpg living_room/sync_depth_01290.png 518.8579
|
556 |
+
living_room/rgb_01291.jpg living_room/sync_depth_01291.png 518.8579
|
557 |
+
living_room/rgb_01292.jpg living_room/sync_depth_01292.png 518.8579
|
558 |
+
living_room/rgb_01293.jpg living_room/sync_depth_01293.png 518.8579
|
559 |
+
living_room/rgb_01294.jpg living_room/sync_depth_01294.png 518.8579
|
560 |
+
living_room/rgb_01296.jpg living_room/sync_depth_01296.png 518.8579
|
561 |
+
living_room/rgb_01297.jpg living_room/sync_depth_01297.png 518.8579
|
562 |
+
living_room/rgb_01298.jpg living_room/sync_depth_01298.png 518.8579
|
563 |
+
living_room/rgb_01301.jpg living_room/sync_depth_01301.png 518.8579
|
564 |
+
living_room/rgb_01303.jpg living_room/sync_depth_01303.png 518.8579
|
565 |
+
living_room/rgb_01304.jpg living_room/sync_depth_01304.png 518.8579
|
566 |
+
living_room/rgb_01305.jpg living_room/sync_depth_01305.png 518.8579
|
567 |
+
living_room/rgb_01306.jpg living_room/sync_depth_01306.png 518.8579
|
568 |
+
living_room/rgb_01307.jpg living_room/sync_depth_01307.png 518.8579
|
569 |
+
living_room/rgb_01313.jpg living_room/sync_depth_01313.png 518.8579
|
570 |
+
living_room/rgb_01314.jpg living_room/sync_depth_01314.png 518.8579
|
571 |
+
living_room/rgb_01328.jpg living_room/sync_depth_01328.png 518.8579
|
572 |
+
living_room/rgb_01329.jpg living_room/sync_depth_01329.png 518.8579
|
573 |
+
living_room/rgb_01330.jpg living_room/sync_depth_01330.png 518.8579
|
574 |
+
living_room/rgb_01331.jpg living_room/sync_depth_01331.png 518.8579
|
575 |
+
living_room/rgb_01334.jpg living_room/sync_depth_01334.png 518.8579
|
576 |
+
living_room/rgb_01335.jpg living_room/sync_depth_01335.png 518.8579
|
577 |
+
living_room/rgb_01336.jpg living_room/sync_depth_01336.png 518.8579
|
578 |
+
living_room/rgb_01337.jpg living_room/sync_depth_01337.png 518.8579
|
579 |
+
living_room/rgb_01338.jpg living_room/sync_depth_01338.png 518.8579
|
580 |
+
living_room/rgb_01339.jpg living_room/sync_depth_01339.png 518.8579
|
581 |
+
office/rgb_00008.jpg office/sync_depth_00008.png 518.8579
|
582 |
+
office/rgb_00013.jpg office/sync_depth_00013.png 518.8579
|
583 |
+
office/rgb_00014.jpg office/sync_depth_00014.png 518.8579
|
584 |
+
office/rgb_00015.jpg office/sync_depth_00015.png 518.8579
|
585 |
+
office/rgb_00016.jpg office/sync_depth_00016.png 518.8579
|
586 |
+
office/rgb_00017.jpg office/sync_depth_00017.png 518.8579
|
587 |
+
office/rgb_00020.jpg office/sync_depth_00020.png 518.8579
|
588 |
+
office/rgb_00027.jpg office/sync_depth_00027.png 518.8579
|
589 |
+
office/rgb_00028.jpg office/sync_depth_00028.png 518.8579
|
590 |
+
office/rgb_00029.jpg office/sync_depth_00029.png 518.8579
|
591 |
+
office/rgb_00030.jpg office/sync_depth_00030.png 518.8579
|
592 |
+
office/rgb_00031.jpg office/sync_depth_00031.png 518.8579
|
593 |
+
office/rgb_00032.jpg office/sync_depth_00032.png 518.8579
|
594 |
+
office/rgb_00033.jpg office/sync_depth_00033.png 518.8579
|
595 |
+
office/rgb_00034.jpg office/sync_depth_00034.png 518.8579
|
596 |
+
office/rgb_00035.jpg office/sync_depth_00035.png 518.8579
|
597 |
+
office/rgb_00036.jpg office/sync_depth_00036.png 518.8579
|
598 |
+
office/rgb_00038.jpg office/sync_depth_00038.png 518.8579
|
599 |
+
office/rgb_00039.jpg office/sync_depth_00039.png 518.8579
|
600 |
+
office/rgb_00040.jpg office/sync_depth_00040.png 518.8579
|
601 |
+
office/rgb_00041.jpg office/sync_depth_00041.png 518.8579
|
602 |
+
office/rgb_00042.jpg office/sync_depth_00042.png 518.8579
|
603 |
+
office/rgb_00270.jpg office/sync_depth_00270.png 518.8579
|
604 |
+
office/rgb_00271.jpg office/sync_depth_00271.png 518.8579
|
605 |
+
office/rgb_00611.jpg office/sync_depth_00611.png 518.8579
|
606 |
+
office/rgb_00612.jpg office/sync_depth_00612.png 518.8579
|
607 |
+
office/rgb_00616.jpg office/sync_depth_00616.png 518.8579
|
608 |
+
office/rgb_00617.jpg office/sync_depth_00617.png 518.8579
|
609 |
+
office/rgb_00618.jpg office/sync_depth_00618.png 518.8579
|
610 |
+
office/rgb_00619.jpg office/sync_depth_00619.png 518.8579
|
611 |
+
office/rgb_00620.jpg office/sync_depth_00620.png 518.8579
|
612 |
+
office/rgb_00632.jpg office/sync_depth_00632.png 518.8579
|
613 |
+
office/rgb_00633.jpg office/sync_depth_00633.png 518.8579
|
614 |
+
office/rgb_00634.jpg office/sync_depth_00634.png 518.8579
|
615 |
+
office/rgb_00635.jpg office/sync_depth_00635.png 518.8579
|
616 |
+
office/rgb_00636.jpg office/sync_depth_00636.png 518.8579
|
617 |
+
office/rgb_00637.jpg office/sync_depth_00637.png 518.8579
|
618 |
+
office/rgb_00037.jpg office/sync_depth_00037.png 518.8579
|
619 |
+
office_kitchen/rgb_00410.jpg office_kitchen/sync_depth_00410.png 518.8579
|
620 |
+
office_kitchen/rgb_00411.jpg office_kitchen/sync_depth_00411.png 518.8579
|
621 |
+
office_kitchen/rgb_00412.jpg office_kitchen/sync_depth_00412.png 518.8579
|
622 |
+
office_kitchen/rgb_00413.jpg office_kitchen/sync_depth_00413.png 518.8579
|
623 |
+
playroom/rgb_00429.jpg playroom/sync_depth_00429.png 518.8579
|
624 |
+
playroom/rgb_00430.jpg playroom/sync_depth_00430.png 518.8579
|
625 |
+
playroom/rgb_00431.jpg playroom/sync_depth_00431.png 518.8579
|
626 |
+
playroom/rgb_00432.jpg playroom/sync_depth_00432.png 518.8579
|
627 |
+
playroom/rgb_00433.jpg playroom/sync_depth_00433.png 518.8579
|
628 |
+
playroom/rgb_00434.jpg playroom/sync_depth_00434.png 518.8579
|
629 |
+
playroom/rgb_00440.jpg playroom/sync_depth_00440.png 518.8579
|
630 |
+
playroom/rgb_00441.jpg playroom/sync_depth_00441.png 518.8579
|
631 |
+
playroom/rgb_00442.jpg playroom/sync_depth_00442.png 518.8579
|
632 |
+
playroom/rgb_00443.jpg playroom/sync_depth_00443.png 518.8579
|
633 |
+
playroom/rgb_00444.jpg playroom/sync_depth_00444.png 518.8579
|
634 |
+
playroom/rgb_00445.jpg playroom/sync_depth_00445.png 518.8579
|
635 |
+
playroom/rgb_00446.jpg playroom/sync_depth_00446.png 518.8579
|
636 |
+
playroom/rgb_00447.jpg playroom/sync_depth_00447.png 518.8579
|
637 |
+
reception_room/rgb_00461.jpg reception_room/sync_depth_00461.png 518.8579
|
638 |
+
reception_room/rgb_00462.jpg reception_room/sync_depth_00462.png 518.8579
|
639 |
+
reception_room/rgb_00463.jpg reception_room/sync_depth_00463.png 518.8579
|
640 |
+
reception_room/rgb_00464.jpg reception_room/sync_depth_00464.png 518.8579
|
641 |
+
reception_room/rgb_00465.jpg reception_room/sync_depth_00465.png 518.8579
|
642 |
+
study/rgb_00468.jpg study/sync_depth_00468.png 518.8579
|
643 |
+
study/rgb_00469.jpg study/sync_depth_00469.png 518.8579
|
644 |
+
study/rgb_00470.jpg study/sync_depth_00470.png 518.8579
|
645 |
+
study/rgb_00471.jpg study/sync_depth_00471.png 518.8579
|
646 |
+
study/rgb_00472.jpg study/sync_depth_00472.png 518.8579
|
647 |
+
study/rgb_00473.jpg study/sync_depth_00473.png 518.8579
|
648 |
+
study/rgb_00474.jpg study/sync_depth_00474.png 518.8579
|
649 |
+
study/rgb_00475.jpg study/sync_depth_00475.png 518.8579
|
650 |
+
study/rgb_00476.jpg study/sync_depth_00476.png 518.8579
|
651 |
+
study/rgb_00643.jpg study/sync_depth_00643.png 518.8579
|
652 |
+
study/rgb_00644.jpg study/sync_depth_00644.png 518.8579
|
653 |
+
study_room/rgb_00272.jpg study_room/sync_depth_00272.png 518.8579
|
654 |
+
study_room/rgb_00278.jpg study_room/sync_depth_00278.png 518.8579
|
metric_depth/train_test_inputs/nyudepthv2_train_files_with_gt.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
metric_depth/zoedepth/data/__init__.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
metric_depth/zoedepth/data/data_mono.py
ADDED
@@ -0,0 +1,573 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
# This file is partly inspired from BTS (https://github.com/cleinc/bts/blob/master/pytorch/bts_dataloader.py); author: Jin Han Lee
|
26 |
+
|
27 |
+
import itertools
|
28 |
+
import os
|
29 |
+
import random
|
30 |
+
|
31 |
+
import numpy as np
|
32 |
+
import cv2
|
33 |
+
import torch
|
34 |
+
import torch.nn as nn
|
35 |
+
import torch.utils.data.distributed
|
36 |
+
from zoedepth.utils.easydict import EasyDict as edict
|
37 |
+
from PIL import Image, ImageOps
|
38 |
+
from torch.utils.data import DataLoader, Dataset
|
39 |
+
from torchvision import transforms
|
40 |
+
|
41 |
+
from zoedepth.utils.config import change_dataset
|
42 |
+
|
43 |
+
from .ddad import get_ddad_loader
|
44 |
+
from .diml_indoor_test import get_diml_indoor_loader
|
45 |
+
from .diml_outdoor_test import get_diml_outdoor_loader
|
46 |
+
from .diode import get_diode_loader
|
47 |
+
from .hypersim import get_hypersim_loader
|
48 |
+
from .ibims import get_ibims_loader
|
49 |
+
from .sun_rgbd_loader import get_sunrgbd_loader
|
50 |
+
from .vkitti import get_vkitti_loader
|
51 |
+
from .vkitti2 import get_vkitti2_loader
|
52 |
+
|
53 |
+
from .preprocess import CropParams, get_white_border, get_black_border
|
54 |
+
|
55 |
+
|
56 |
+
def _is_pil_image(img):
|
57 |
+
return isinstance(img, Image.Image)
|
58 |
+
|
59 |
+
|
60 |
+
def _is_numpy_image(img):
|
61 |
+
return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
|
62 |
+
|
63 |
+
|
64 |
+
def preprocessing_transforms(mode, **kwargs):
|
65 |
+
return transforms.Compose([
|
66 |
+
ToTensor(mode=mode, **kwargs)
|
67 |
+
])
|
68 |
+
|
69 |
+
|
70 |
+
class DepthDataLoader(object):
|
71 |
+
def __init__(self, config, mode, device='cpu', transform=None, **kwargs):
|
72 |
+
"""
|
73 |
+
Data loader for depth datasets
|
74 |
+
|
75 |
+
Args:
|
76 |
+
config (dict): Config dictionary. Refer to utils/config.py
|
77 |
+
mode (str): "train" or "online_eval"
|
78 |
+
device (str, optional): Device to load the data on. Defaults to 'cpu'.
|
79 |
+
transform (torchvision.transforms, optional): Transform to apply to the data. Defaults to None.
|
80 |
+
"""
|
81 |
+
|
82 |
+
self.config = config
|
83 |
+
|
84 |
+
if config.dataset == 'ibims':
|
85 |
+
self.data = get_ibims_loader(config, batch_size=1, num_workers=1)
|
86 |
+
return
|
87 |
+
|
88 |
+
if config.dataset == 'sunrgbd':
|
89 |
+
self.data = get_sunrgbd_loader(
|
90 |
+
data_dir_root=config.sunrgbd_root, batch_size=1, num_workers=1)
|
91 |
+
return
|
92 |
+
|
93 |
+
if config.dataset == 'diml_indoor':
|
94 |
+
self.data = get_diml_indoor_loader(
|
95 |
+
data_dir_root=config.diml_indoor_root, batch_size=1, num_workers=1)
|
96 |
+
return
|
97 |
+
|
98 |
+
if config.dataset == 'diml_outdoor':
|
99 |
+
self.data = get_diml_outdoor_loader(
|
100 |
+
data_dir_root=config.diml_outdoor_root, batch_size=1, num_workers=1)
|
101 |
+
return
|
102 |
+
|
103 |
+
if "diode" in config.dataset:
|
104 |
+
self.data = get_diode_loader(
|
105 |
+
config[config.dataset+"_root"], batch_size=1, num_workers=1)
|
106 |
+
return
|
107 |
+
|
108 |
+
if config.dataset == 'hypersim_test':
|
109 |
+
self.data = get_hypersim_loader(
|
110 |
+
config.hypersim_test_root, batch_size=1, num_workers=1)
|
111 |
+
return
|
112 |
+
|
113 |
+
if config.dataset == 'vkitti':
|
114 |
+
self.data = get_vkitti_loader(
|
115 |
+
config.vkitti_root, batch_size=1, num_workers=1)
|
116 |
+
return
|
117 |
+
|
118 |
+
if config.dataset == 'vkitti2':
|
119 |
+
self.data = get_vkitti2_loader(
|
120 |
+
config.vkitti2_root, batch_size=1, num_workers=1)
|
121 |
+
return
|
122 |
+
|
123 |
+
if config.dataset == 'ddad':
|
124 |
+
self.data = get_ddad_loader(config.ddad_root, resize_shape=(
|
125 |
+
352, 1216), batch_size=1, num_workers=1)
|
126 |
+
return
|
127 |
+
|
128 |
+
img_size = self.config.get("img_size", None)
|
129 |
+
img_size = img_size if self.config.get(
|
130 |
+
"do_input_resize", False) else None
|
131 |
+
|
132 |
+
if transform is None:
|
133 |
+
transform = preprocessing_transforms(mode, size=img_size)
|
134 |
+
|
135 |
+
if mode == 'train':
|
136 |
+
|
137 |
+
Dataset = DataLoadPreprocess
|
138 |
+
self.training_samples = Dataset(
|
139 |
+
config, mode, transform=transform, device=device)
|
140 |
+
|
141 |
+
if config.distributed:
|
142 |
+
self.train_sampler = torch.utils.data.distributed.DistributedSampler(
|
143 |
+
self.training_samples)
|
144 |
+
else:
|
145 |
+
self.train_sampler = None
|
146 |
+
|
147 |
+
self.data = DataLoader(self.training_samples,
|
148 |
+
batch_size=config.batch_size,
|
149 |
+
shuffle=(self.train_sampler is None),
|
150 |
+
num_workers=config.workers,
|
151 |
+
pin_memory=True,
|
152 |
+
persistent_workers=True,
|
153 |
+
# prefetch_factor=2,
|
154 |
+
sampler=self.train_sampler)
|
155 |
+
|
156 |
+
elif mode == 'online_eval':
|
157 |
+
self.testing_samples = DataLoadPreprocess(
|
158 |
+
config, mode, transform=transform)
|
159 |
+
if config.distributed: # redundant. here only for readability and to be more explicit
|
160 |
+
# Give whole test set to all processes (and report evaluation only on one) regardless
|
161 |
+
self.eval_sampler = None
|
162 |
+
else:
|
163 |
+
self.eval_sampler = None
|
164 |
+
self.data = DataLoader(self.testing_samples, 1,
|
165 |
+
shuffle=kwargs.get("shuffle_test", False),
|
166 |
+
num_workers=1,
|
167 |
+
pin_memory=False,
|
168 |
+
sampler=self.eval_sampler)
|
169 |
+
|
170 |
+
elif mode == 'test':
|
171 |
+
self.testing_samples = DataLoadPreprocess(
|
172 |
+
config, mode, transform=transform)
|
173 |
+
self.data = DataLoader(self.testing_samples,
|
174 |
+
1, shuffle=False, num_workers=1)
|
175 |
+
|
176 |
+
else:
|
177 |
+
print(
|
178 |
+
'mode should be one of \'train, test, online_eval\'. Got {}'.format(mode))
|
179 |
+
|
180 |
+
|
181 |
+
def repetitive_roundrobin(*iterables):
|
182 |
+
"""
|
183 |
+
cycles through iterables but sample wise
|
184 |
+
first yield first sample from first iterable then first sample from second iterable and so on
|
185 |
+
then second sample from first iterable then second sample from second iterable and so on
|
186 |
+
|
187 |
+
If one iterable is shorter than the others, it is repeated until all iterables are exhausted
|
188 |
+
repetitive_roundrobin('ABC', 'D', 'EF') --> A D E B D F C D E
|
189 |
+
"""
|
190 |
+
# Repetitive roundrobin
|
191 |
+
iterables_ = [iter(it) for it in iterables]
|
192 |
+
exhausted = [False] * len(iterables)
|
193 |
+
while not all(exhausted):
|
194 |
+
for i, it in enumerate(iterables_):
|
195 |
+
try:
|
196 |
+
yield next(it)
|
197 |
+
except StopIteration:
|
198 |
+
exhausted[i] = True
|
199 |
+
iterables_[i] = itertools.cycle(iterables[i])
|
200 |
+
# First elements may get repeated if one iterable is shorter than the others
|
201 |
+
yield next(iterables_[i])
|
202 |
+
|
203 |
+
|
204 |
+
class RepetitiveRoundRobinDataLoader(object):
|
205 |
+
def __init__(self, *dataloaders):
|
206 |
+
self.dataloaders = dataloaders
|
207 |
+
|
208 |
+
def __iter__(self):
|
209 |
+
return repetitive_roundrobin(*self.dataloaders)
|
210 |
+
|
211 |
+
def __len__(self):
|
212 |
+
# First samples get repeated, thats why the plus one
|
213 |
+
return len(self.dataloaders) * (max(len(dl) for dl in self.dataloaders) + 1)
|
214 |
+
|
215 |
+
|
216 |
+
class MixedNYUKITTI(object):
|
217 |
+
def __init__(self, config, mode, device='cpu', **kwargs):
|
218 |
+
config = edict(config)
|
219 |
+
config.workers = config.workers // 2
|
220 |
+
self.config = config
|
221 |
+
nyu_conf = change_dataset(edict(config), 'nyu')
|
222 |
+
kitti_conf = change_dataset(edict(config), 'kitti')
|
223 |
+
|
224 |
+
# make nyu default for testing
|
225 |
+
self.config = config = nyu_conf
|
226 |
+
img_size = self.config.get("img_size", None)
|
227 |
+
img_size = img_size if self.config.get(
|
228 |
+
"do_input_resize", False) else None
|
229 |
+
if mode == 'train':
|
230 |
+
nyu_loader = DepthDataLoader(
|
231 |
+
nyu_conf, mode, device=device, transform=preprocessing_transforms(mode, size=img_size)).data
|
232 |
+
kitti_loader = DepthDataLoader(
|
233 |
+
kitti_conf, mode, device=device, transform=preprocessing_transforms(mode, size=img_size)).data
|
234 |
+
# It has been changed to repetitive roundrobin
|
235 |
+
self.data = RepetitiveRoundRobinDataLoader(
|
236 |
+
nyu_loader, kitti_loader)
|
237 |
+
else:
|
238 |
+
self.data = DepthDataLoader(nyu_conf, mode, device=device).data
|
239 |
+
|
240 |
+
|
241 |
+
def remove_leading_slash(s):
|
242 |
+
if s[0] == '/' or s[0] == '\\':
|
243 |
+
return s[1:]
|
244 |
+
return s
|
245 |
+
|
246 |
+
|
247 |
+
class CachedReader:
|
248 |
+
def __init__(self, shared_dict=None):
|
249 |
+
if shared_dict:
|
250 |
+
self._cache = shared_dict
|
251 |
+
else:
|
252 |
+
self._cache = {}
|
253 |
+
|
254 |
+
def open(self, fpath):
|
255 |
+
im = self._cache.get(fpath, None)
|
256 |
+
if im is None:
|
257 |
+
im = self._cache[fpath] = Image.open(fpath)
|
258 |
+
return im
|
259 |
+
|
260 |
+
|
261 |
+
class ImReader:
|
262 |
+
def __init__(self):
|
263 |
+
pass
|
264 |
+
|
265 |
+
# @cache
|
266 |
+
def open(self, fpath):
|
267 |
+
return Image.open(fpath)
|
268 |
+
|
269 |
+
|
270 |
+
class DataLoadPreprocess(Dataset):
|
271 |
+
def __init__(self, config, mode, transform=None, is_for_online_eval=False, **kwargs):
|
272 |
+
self.config = config
|
273 |
+
if mode == 'online_eval':
|
274 |
+
with open(config.filenames_file_eval, 'r') as f:
|
275 |
+
self.filenames = f.readlines()
|
276 |
+
else:
|
277 |
+
with open(config.filenames_file, 'r') as f:
|
278 |
+
self.filenames = f.readlines()
|
279 |
+
|
280 |
+
self.mode = mode
|
281 |
+
self.transform = transform
|
282 |
+
self.to_tensor = ToTensor(mode)
|
283 |
+
self.is_for_online_eval = is_for_online_eval
|
284 |
+
if config.use_shared_dict:
|
285 |
+
self.reader = CachedReader(config.shared_dict)
|
286 |
+
else:
|
287 |
+
self.reader = ImReader()
|
288 |
+
|
289 |
+
def postprocess(self, sample):
|
290 |
+
return sample
|
291 |
+
|
292 |
+
def __getitem__(self, idx):
|
293 |
+
sample_path = self.filenames[idx]
|
294 |
+
focal = float(sample_path.split()[2])
|
295 |
+
sample = {}
|
296 |
+
|
297 |
+
if self.mode == 'train':
|
298 |
+
if self.config.dataset == 'kitti' and self.config.use_right and random.random() > 0.5:
|
299 |
+
image_path = os.path.join(
|
300 |
+
self.config.data_path, remove_leading_slash(sample_path.split()[3]))
|
301 |
+
depth_path = os.path.join(
|
302 |
+
self.config.gt_path, remove_leading_slash(sample_path.split()[4]))
|
303 |
+
else:
|
304 |
+
image_path = os.path.join(
|
305 |
+
self.config.data_path, remove_leading_slash(sample_path.split()[0]))
|
306 |
+
depth_path = os.path.join(
|
307 |
+
self.config.gt_path, remove_leading_slash(sample_path.split()[1]))
|
308 |
+
|
309 |
+
image = self.reader.open(image_path)
|
310 |
+
depth_gt = self.reader.open(depth_path)
|
311 |
+
w, h = image.size
|
312 |
+
|
313 |
+
if self.config.do_kb_crop:
|
314 |
+
height = image.height
|
315 |
+
width = image.width
|
316 |
+
top_margin = int(height - 352)
|
317 |
+
left_margin = int((width - 1216) / 2)
|
318 |
+
depth_gt = depth_gt.crop(
|
319 |
+
(left_margin, top_margin, left_margin + 1216, top_margin + 352))
|
320 |
+
image = image.crop(
|
321 |
+
(left_margin, top_margin, left_margin + 1216, top_margin + 352))
|
322 |
+
|
323 |
+
# Avoid blank boundaries due to pixel registration?
|
324 |
+
# Train images have white border. Test images have black border.
|
325 |
+
if self.config.dataset == 'nyu' and self.config.avoid_boundary:
|
326 |
+
# print("Avoiding Blank Boundaries!")
|
327 |
+
# We just crop and pad again with reflect padding to original size
|
328 |
+
# original_size = image.size
|
329 |
+
crop_params = get_white_border(np.array(image, dtype=np.uint8))
|
330 |
+
image = image.crop((crop_params.left, crop_params.top, crop_params.right, crop_params.bottom))
|
331 |
+
depth_gt = depth_gt.crop((crop_params.left, crop_params.top, crop_params.right, crop_params.bottom))
|
332 |
+
|
333 |
+
# Use reflect padding to fill the blank
|
334 |
+
image = np.array(image)
|
335 |
+
image = np.pad(image, ((crop_params.top, h - crop_params.bottom), (crop_params.left, w - crop_params.right), (0, 0)), mode='reflect')
|
336 |
+
image = Image.fromarray(image)
|
337 |
+
|
338 |
+
depth_gt = np.array(depth_gt)
|
339 |
+
depth_gt = np.pad(depth_gt, ((crop_params.top, h - crop_params.bottom), (crop_params.left, w - crop_params.right)), 'constant', constant_values=0)
|
340 |
+
depth_gt = Image.fromarray(depth_gt)
|
341 |
+
|
342 |
+
|
343 |
+
if self.config.do_random_rotate and (self.config.aug):
|
344 |
+
random_angle = (random.random() - 0.5) * 2 * self.config.degree
|
345 |
+
image = self.rotate_image(image, random_angle)
|
346 |
+
depth_gt = self.rotate_image(
|
347 |
+
depth_gt, random_angle, flag=Image.NEAREST)
|
348 |
+
|
349 |
+
image = np.asarray(image, dtype=np.float32) / 255.0
|
350 |
+
depth_gt = np.asarray(depth_gt, dtype=np.float32)
|
351 |
+
depth_gt = np.expand_dims(depth_gt, axis=2)
|
352 |
+
|
353 |
+
if self.config.dataset == 'nyu':
|
354 |
+
depth_gt = depth_gt / 1000.0
|
355 |
+
else:
|
356 |
+
depth_gt = depth_gt / 256.0
|
357 |
+
|
358 |
+
if self.config.aug and (self.config.random_crop):
|
359 |
+
image, depth_gt = self.random_crop(
|
360 |
+
image, depth_gt, self.config.input_height, self.config.input_width)
|
361 |
+
|
362 |
+
if self.config.aug and self.config.random_translate:
|
363 |
+
# print("Random Translation!")
|
364 |
+
image, depth_gt = self.random_translate(image, depth_gt, self.config.max_translation)
|
365 |
+
|
366 |
+
image, depth_gt = self.train_preprocess(image, depth_gt)
|
367 |
+
mask = np.logical_and(depth_gt > self.config.min_depth,
|
368 |
+
depth_gt < self.config.max_depth).squeeze()[None, ...]
|
369 |
+
sample = {'image': image, 'depth': depth_gt, 'focal': focal,
|
370 |
+
'mask': mask, **sample}
|
371 |
+
|
372 |
+
else:
|
373 |
+
if self.mode == 'online_eval':
|
374 |
+
data_path = self.config.data_path_eval
|
375 |
+
else:
|
376 |
+
data_path = self.config.data_path
|
377 |
+
|
378 |
+
image_path = os.path.join(
|
379 |
+
data_path, remove_leading_slash(sample_path.split()[0]))
|
380 |
+
image = np.asarray(self.reader.open(image_path),
|
381 |
+
dtype=np.float32) / 255.0
|
382 |
+
|
383 |
+
if self.mode == 'online_eval':
|
384 |
+
gt_path = self.config.gt_path_eval
|
385 |
+
depth_path = os.path.join(
|
386 |
+
gt_path, remove_leading_slash(sample_path.split()[1]))
|
387 |
+
has_valid_depth = False
|
388 |
+
try:
|
389 |
+
depth_gt = self.reader.open(depth_path)
|
390 |
+
has_valid_depth = True
|
391 |
+
except IOError:
|
392 |
+
depth_gt = False
|
393 |
+
# print('Missing gt for {}'.format(image_path))
|
394 |
+
|
395 |
+
if has_valid_depth:
|
396 |
+
depth_gt = np.asarray(depth_gt, dtype=np.float32)
|
397 |
+
depth_gt = np.expand_dims(depth_gt, axis=2)
|
398 |
+
if self.config.dataset == 'nyu':
|
399 |
+
depth_gt = depth_gt / 1000.0
|
400 |
+
else:
|
401 |
+
depth_gt = depth_gt / 256.0
|
402 |
+
|
403 |
+
mask = np.logical_and(
|
404 |
+
depth_gt >= self.config.min_depth, depth_gt <= self.config.max_depth).squeeze()[None, ...]
|
405 |
+
else:
|
406 |
+
mask = False
|
407 |
+
|
408 |
+
if self.config.do_kb_crop:
|
409 |
+
height = image.shape[0]
|
410 |
+
width = image.shape[1]
|
411 |
+
top_margin = int(height - 352)
|
412 |
+
left_margin = int((width - 1216) / 2)
|
413 |
+
image = image[top_margin:top_margin + 352,
|
414 |
+
left_margin:left_margin + 1216, :]
|
415 |
+
if self.mode == 'online_eval' and has_valid_depth:
|
416 |
+
depth_gt = depth_gt[top_margin:top_margin +
|
417 |
+
352, left_margin:left_margin + 1216, :]
|
418 |
+
|
419 |
+
if self.mode == 'online_eval':
|
420 |
+
sample = {'image': image, 'depth': depth_gt, 'focal': focal, 'has_valid_depth': has_valid_depth,
|
421 |
+
'image_path': sample_path.split()[0], 'depth_path': sample_path.split()[1],
|
422 |
+
'mask': mask}
|
423 |
+
else:
|
424 |
+
sample = {'image': image, 'focal': focal}
|
425 |
+
|
426 |
+
if (self.mode == 'train') or ('has_valid_depth' in sample and sample['has_valid_depth']):
|
427 |
+
mask = np.logical_and(depth_gt > self.config.min_depth,
|
428 |
+
depth_gt < self.config.max_depth).squeeze()[None, ...]
|
429 |
+
sample['mask'] = mask
|
430 |
+
|
431 |
+
if self.transform:
|
432 |
+
sample = self.transform(sample)
|
433 |
+
|
434 |
+
sample = self.postprocess(sample)
|
435 |
+
sample['dataset'] = self.config.dataset
|
436 |
+
sample = {**sample, 'image_path': sample_path.split()[0], 'depth_path': sample_path.split()[1]}
|
437 |
+
|
438 |
+
return sample
|
439 |
+
|
440 |
+
def rotate_image(self, image, angle, flag=Image.BILINEAR):
|
441 |
+
result = image.rotate(angle, resample=flag)
|
442 |
+
return result
|
443 |
+
|
444 |
+
def random_crop(self, img, depth, height, width):
|
445 |
+
assert img.shape[0] >= height
|
446 |
+
assert img.shape[1] >= width
|
447 |
+
assert img.shape[0] == depth.shape[0]
|
448 |
+
assert img.shape[1] == depth.shape[1]
|
449 |
+
x = random.randint(0, img.shape[1] - width)
|
450 |
+
y = random.randint(0, img.shape[0] - height)
|
451 |
+
img = img[y:y + height, x:x + width, :]
|
452 |
+
depth = depth[y:y + height, x:x + width, :]
|
453 |
+
|
454 |
+
return img, depth
|
455 |
+
|
456 |
+
def random_translate(self, img, depth, max_t=20):
|
457 |
+
assert img.shape[0] == depth.shape[0]
|
458 |
+
assert img.shape[1] == depth.shape[1]
|
459 |
+
p = self.config.translate_prob
|
460 |
+
do_translate = random.random()
|
461 |
+
if do_translate > p:
|
462 |
+
return img, depth
|
463 |
+
x = random.randint(-max_t, max_t)
|
464 |
+
y = random.randint(-max_t, max_t)
|
465 |
+
M = np.float32([[1, 0, x], [0, 1, y]])
|
466 |
+
# print(img.shape, depth.shape)
|
467 |
+
img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
|
468 |
+
depth = cv2.warpAffine(depth, M, (depth.shape[1], depth.shape[0]))
|
469 |
+
depth = depth.squeeze()[..., None] # add channel dim back. Affine warp removes it
|
470 |
+
# print("after", img.shape, depth.shape)
|
471 |
+
return img, depth
|
472 |
+
|
473 |
+
def train_preprocess(self, image, depth_gt):
|
474 |
+
if self.config.aug:
|
475 |
+
# Random flipping
|
476 |
+
do_flip = random.random()
|
477 |
+
if do_flip > 0.5:
|
478 |
+
image = (image[:, ::-1, :]).copy()
|
479 |
+
depth_gt = (depth_gt[:, ::-1, :]).copy()
|
480 |
+
|
481 |
+
# Random gamma, brightness, color augmentation
|
482 |
+
do_augment = random.random()
|
483 |
+
if do_augment > 0.5:
|
484 |
+
image = self.augment_image(image)
|
485 |
+
|
486 |
+
return image, depth_gt
|
487 |
+
|
488 |
+
def augment_image(self, image):
|
489 |
+
# gamma augmentation
|
490 |
+
gamma = random.uniform(0.9, 1.1)
|
491 |
+
image_aug = image ** gamma
|
492 |
+
|
493 |
+
# brightness augmentation
|
494 |
+
if self.config.dataset == 'nyu':
|
495 |
+
brightness = random.uniform(0.75, 1.25)
|
496 |
+
else:
|
497 |
+
brightness = random.uniform(0.9, 1.1)
|
498 |
+
image_aug = image_aug * brightness
|
499 |
+
|
500 |
+
# color augmentation
|
501 |
+
colors = np.random.uniform(0.9, 1.1, size=3)
|
502 |
+
white = np.ones((image.shape[0], image.shape[1]))
|
503 |
+
color_image = np.stack([white * colors[i] for i in range(3)], axis=2)
|
504 |
+
image_aug *= color_image
|
505 |
+
image_aug = np.clip(image_aug, 0, 1)
|
506 |
+
|
507 |
+
return image_aug
|
508 |
+
|
509 |
+
def __len__(self):
|
510 |
+
return len(self.filenames)
|
511 |
+
|
512 |
+
|
513 |
+
class ToTensor(object):
|
514 |
+
def __init__(self, mode, do_normalize=False, size=None):
|
515 |
+
self.mode = mode
|
516 |
+
self.normalize = transforms.Normalize(
|
517 |
+
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if do_normalize else nn.Identity()
|
518 |
+
self.size = size
|
519 |
+
if size is not None:
|
520 |
+
self.resize = transforms.Resize(size=size)
|
521 |
+
else:
|
522 |
+
self.resize = nn.Identity()
|
523 |
+
|
524 |
+
def __call__(self, sample):
|
525 |
+
image, focal = sample['image'], sample['focal']
|
526 |
+
image = self.to_tensor(image)
|
527 |
+
image = self.normalize(image)
|
528 |
+
image = self.resize(image)
|
529 |
+
|
530 |
+
if self.mode == 'test':
|
531 |
+
return {'image': image, 'focal': focal}
|
532 |
+
|
533 |
+
depth = sample['depth']
|
534 |
+
if self.mode == 'train':
|
535 |
+
depth = self.to_tensor(depth)
|
536 |
+
return {**sample, 'image': image, 'depth': depth, 'focal': focal}
|
537 |
+
else:
|
538 |
+
has_valid_depth = sample['has_valid_depth']
|
539 |
+
image = self.resize(image)
|
540 |
+
return {**sample, 'image': image, 'depth': depth, 'focal': focal, 'has_valid_depth': has_valid_depth,
|
541 |
+
'image_path': sample['image_path'], 'depth_path': sample['depth_path']}
|
542 |
+
|
543 |
+
def to_tensor(self, pic):
|
544 |
+
if not (_is_pil_image(pic) or _is_numpy_image(pic)):
|
545 |
+
raise TypeError(
|
546 |
+
'pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
|
547 |
+
|
548 |
+
if isinstance(pic, np.ndarray):
|
549 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
550 |
+
return img
|
551 |
+
|
552 |
+
# handle PIL Image
|
553 |
+
if pic.mode == 'I':
|
554 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
555 |
+
elif pic.mode == 'I;16':
|
556 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
557 |
+
else:
|
558 |
+
img = torch.ByteTensor(
|
559 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
560 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
561 |
+
if pic.mode == 'YCbCr':
|
562 |
+
nchannel = 3
|
563 |
+
elif pic.mode == 'I;16':
|
564 |
+
nchannel = 1
|
565 |
+
else:
|
566 |
+
nchannel = len(pic.mode)
|
567 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
568 |
+
|
569 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
570 |
+
if isinstance(img, torch.ByteTensor):
|
571 |
+
return img.float()
|
572 |
+
else:
|
573 |
+
return img
|
metric_depth/zoedepth/data/ddad.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import os
|
26 |
+
|
27 |
+
import numpy as np
|
28 |
+
import torch
|
29 |
+
from PIL import Image
|
30 |
+
from torch.utils.data import DataLoader, Dataset
|
31 |
+
from torchvision import transforms
|
32 |
+
|
33 |
+
|
34 |
+
class ToTensor(object):
|
35 |
+
def __init__(self, resize_shape):
|
36 |
+
# self.normalize = transforms.Normalize(
|
37 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
38 |
+
self.normalize = lambda x : x
|
39 |
+
self.resize = transforms.Resize(resize_shape)
|
40 |
+
|
41 |
+
def __call__(self, sample):
|
42 |
+
image, depth = sample['image'], sample['depth']
|
43 |
+
image = self.to_tensor(image)
|
44 |
+
image = self.normalize(image)
|
45 |
+
depth = self.to_tensor(depth)
|
46 |
+
|
47 |
+
image = self.resize(image)
|
48 |
+
|
49 |
+
return {'image': image, 'depth': depth, 'dataset': "ddad"}
|
50 |
+
|
51 |
+
def to_tensor(self, pic):
|
52 |
+
|
53 |
+
if isinstance(pic, np.ndarray):
|
54 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
55 |
+
return img
|
56 |
+
|
57 |
+
# # handle PIL Image
|
58 |
+
if pic.mode == 'I':
|
59 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
60 |
+
elif pic.mode == 'I;16':
|
61 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
62 |
+
else:
|
63 |
+
img = torch.ByteTensor(
|
64 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
65 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
66 |
+
if pic.mode == 'YCbCr':
|
67 |
+
nchannel = 3
|
68 |
+
elif pic.mode == 'I;16':
|
69 |
+
nchannel = 1
|
70 |
+
else:
|
71 |
+
nchannel = len(pic.mode)
|
72 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
73 |
+
|
74 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
75 |
+
|
76 |
+
if isinstance(img, torch.ByteTensor):
|
77 |
+
return img.float()
|
78 |
+
else:
|
79 |
+
return img
|
80 |
+
|
81 |
+
|
82 |
+
class DDAD(Dataset):
|
83 |
+
def __init__(self, data_dir_root, resize_shape):
|
84 |
+
import glob
|
85 |
+
|
86 |
+
# image paths are of the form <data_dir_root>/{outleft, depthmap}/*.png
|
87 |
+
|
88 |
+
# self.image_files = glob.glob(os.path.join(data_dir_root, '*.png'))
|
89 |
+
# self.depth_files = [r.replace("_rgb.png", "_depth.npy")
|
90 |
+
# for r in self.image_files]
|
91 |
+
self.image_files, self.depth_files = [], []
|
92 |
+
with open('/mnt/bn/liheyang/MTL-SA-1B/dataset/splits/ddad/val.txt', 'r') as f:
|
93 |
+
lines = f.read().splitlines()
|
94 |
+
for line in lines:
|
95 |
+
self.image_files.append(line.split(' ')[0])
|
96 |
+
self.depth_files.append(line.split(' ')[1])
|
97 |
+
|
98 |
+
self.transform = ToTensor(resize_shape)
|
99 |
+
|
100 |
+
def __getitem__(self, idx):
|
101 |
+
|
102 |
+
image_path = self.image_files[idx]
|
103 |
+
depth_path = self.depth_files[idx]
|
104 |
+
|
105 |
+
image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
|
106 |
+
depth = np.load(depth_path) # meters
|
107 |
+
|
108 |
+
# depth[depth > 8] = -1
|
109 |
+
depth = depth[..., None]
|
110 |
+
|
111 |
+
sample = dict(image=image, depth=depth)
|
112 |
+
sample = self.transform(sample)
|
113 |
+
|
114 |
+
if idx == 0:
|
115 |
+
print(sample["image"].shape)
|
116 |
+
|
117 |
+
return sample
|
118 |
+
|
119 |
+
def __len__(self):
|
120 |
+
return len(self.image_files)
|
121 |
+
|
122 |
+
|
123 |
+
def get_ddad_loader(data_dir_root, resize_shape, batch_size=1, **kwargs):
|
124 |
+
dataset = DDAD(data_dir_root, resize_shape)
|
125 |
+
return DataLoader(dataset, batch_size, **kwargs)
|
metric_depth/zoedepth/data/diml_indoor_test.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import os
|
26 |
+
|
27 |
+
import numpy as np
|
28 |
+
import torch
|
29 |
+
from PIL import Image
|
30 |
+
from torch.utils.data import DataLoader, Dataset
|
31 |
+
from torchvision import transforms
|
32 |
+
|
33 |
+
|
34 |
+
class ToTensor(object):
|
35 |
+
def __init__(self):
|
36 |
+
# self.normalize = transforms.Normalize(
|
37 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
38 |
+
self.normalize = lambda x : x
|
39 |
+
self.resize = transforms.Resize((480, 640))
|
40 |
+
|
41 |
+
def __call__(self, sample):
|
42 |
+
image, depth = sample['image'], sample['depth']
|
43 |
+
image = self.to_tensor(image)
|
44 |
+
image = self.normalize(image)
|
45 |
+
depth = self.to_tensor(depth)
|
46 |
+
|
47 |
+
image = self.resize(image)
|
48 |
+
|
49 |
+
return {'image': image, 'depth': depth, 'dataset': "diml_indoor"}
|
50 |
+
|
51 |
+
def to_tensor(self, pic):
|
52 |
+
|
53 |
+
if isinstance(pic, np.ndarray):
|
54 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
55 |
+
return img
|
56 |
+
|
57 |
+
# # handle PIL Image
|
58 |
+
if pic.mode == 'I':
|
59 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
60 |
+
elif pic.mode == 'I;16':
|
61 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
62 |
+
else:
|
63 |
+
img = torch.ByteTensor(
|
64 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
65 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
66 |
+
if pic.mode == 'YCbCr':
|
67 |
+
nchannel = 3
|
68 |
+
elif pic.mode == 'I;16':
|
69 |
+
nchannel = 1
|
70 |
+
else:
|
71 |
+
nchannel = len(pic.mode)
|
72 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
73 |
+
|
74 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
75 |
+
if isinstance(img, torch.ByteTensor):
|
76 |
+
return img.float()
|
77 |
+
else:
|
78 |
+
return img
|
79 |
+
|
80 |
+
|
81 |
+
class DIML_Indoor(Dataset):
|
82 |
+
def __init__(self, data_dir_root):
|
83 |
+
import glob
|
84 |
+
|
85 |
+
# image paths are of the form <data_dir_root>/{HR, LR}/<scene>/{color, depth_filled}/*.png
|
86 |
+
self.image_files = glob.glob(os.path.join(
|
87 |
+
data_dir_root, "LR", '*', 'color', '*.png'))
|
88 |
+
self.depth_files = [r.replace("color", "depth_filled").replace(
|
89 |
+
"_c.png", "_depth_filled.png") for r in self.image_files]
|
90 |
+
self.transform = ToTensor()
|
91 |
+
|
92 |
+
def __getitem__(self, idx):
|
93 |
+
image_path = self.image_files[idx]
|
94 |
+
depth_path = self.depth_files[idx]
|
95 |
+
|
96 |
+
image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
|
97 |
+
depth = np.asarray(Image.open(depth_path),
|
98 |
+
dtype='uint16') / 1000.0 # mm to meters
|
99 |
+
|
100 |
+
# print(np.shape(image))
|
101 |
+
# print(np.shape(depth))
|
102 |
+
|
103 |
+
# depth[depth > 8] = -1
|
104 |
+
depth = depth[..., None]
|
105 |
+
|
106 |
+
sample = dict(image=image, depth=depth)
|
107 |
+
|
108 |
+
# return sample
|
109 |
+
sample = self.transform(sample)
|
110 |
+
|
111 |
+
if idx == 0:
|
112 |
+
print(sample["image"].shape)
|
113 |
+
|
114 |
+
return sample
|
115 |
+
|
116 |
+
def __len__(self):
|
117 |
+
return len(self.image_files)
|
118 |
+
|
119 |
+
|
120 |
+
def get_diml_indoor_loader(data_dir_root, batch_size=1, **kwargs):
|
121 |
+
dataset = DIML_Indoor(data_dir_root)
|
122 |
+
return DataLoader(dataset, batch_size, **kwargs)
|
123 |
+
|
124 |
+
# get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/HR")
|
125 |
+
# get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/LR")
|
metric_depth/zoedepth/data/diml_outdoor_test.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import os
|
26 |
+
|
27 |
+
import numpy as np
|
28 |
+
import torch
|
29 |
+
from PIL import Image
|
30 |
+
from torch.utils.data import DataLoader, Dataset
|
31 |
+
from torchvision import transforms
|
32 |
+
|
33 |
+
|
34 |
+
class ToTensor(object):
|
35 |
+
def __init__(self):
|
36 |
+
# self.normalize = transforms.Normalize(
|
37 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
38 |
+
self.normalize = lambda x : x
|
39 |
+
|
40 |
+
def __call__(self, sample):
|
41 |
+
image, depth = sample['image'], sample['depth']
|
42 |
+
image = self.to_tensor(image)
|
43 |
+
image = self.normalize(image)
|
44 |
+
depth = self.to_tensor(depth)
|
45 |
+
|
46 |
+
return {'image': image, 'depth': depth, 'dataset': "diml_outdoor"}
|
47 |
+
|
48 |
+
def to_tensor(self, pic):
|
49 |
+
|
50 |
+
if isinstance(pic, np.ndarray):
|
51 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
52 |
+
return img
|
53 |
+
|
54 |
+
# # handle PIL Image
|
55 |
+
if pic.mode == 'I':
|
56 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
57 |
+
elif pic.mode == 'I;16':
|
58 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
59 |
+
else:
|
60 |
+
img = torch.ByteTensor(
|
61 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
62 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
63 |
+
if pic.mode == 'YCbCr':
|
64 |
+
nchannel = 3
|
65 |
+
elif pic.mode == 'I;16':
|
66 |
+
nchannel = 1
|
67 |
+
else:
|
68 |
+
nchannel = len(pic.mode)
|
69 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
70 |
+
|
71 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
72 |
+
if isinstance(img, torch.ByteTensor):
|
73 |
+
return img.float()
|
74 |
+
else:
|
75 |
+
return img
|
76 |
+
|
77 |
+
|
78 |
+
class DIML_Outdoor(Dataset):
|
79 |
+
def __init__(self, data_dir_root):
|
80 |
+
import glob
|
81 |
+
|
82 |
+
# image paths are of the form <data_dir_root>/{outleft, depthmap}/*.png
|
83 |
+
self.image_files = glob.glob(os.path.join(
|
84 |
+
data_dir_root, 'outleft', '*.png'))
|
85 |
+
self.depth_files = [r.replace("outleft", "depthmap")
|
86 |
+
for r in self.image_files]
|
87 |
+
self.transform = ToTensor()
|
88 |
+
|
89 |
+
def __getitem__(self, idx):
|
90 |
+
image_path = self.image_files[idx]
|
91 |
+
depth_path = self.depth_files[idx]
|
92 |
+
|
93 |
+
image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
|
94 |
+
depth = np.asarray(Image.open(depth_path),
|
95 |
+
dtype='uint16') / 1000.0 # mm to meters
|
96 |
+
|
97 |
+
# depth[depth > 8] = -1
|
98 |
+
depth = depth[..., None]
|
99 |
+
|
100 |
+
sample = dict(image=image, depth=depth, dataset="diml_outdoor")
|
101 |
+
|
102 |
+
# return sample
|
103 |
+
return self.transform(sample)
|
104 |
+
|
105 |
+
def __len__(self):
|
106 |
+
return len(self.image_files)
|
107 |
+
|
108 |
+
|
109 |
+
def get_diml_outdoor_loader(data_dir_root, batch_size=1, **kwargs):
|
110 |
+
dataset = DIML_Outdoor(data_dir_root)
|
111 |
+
return DataLoader(dataset, batch_size, **kwargs)
|
112 |
+
|
113 |
+
# get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/HR")
|
114 |
+
# get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/LR")
|
metric_depth/zoedepth/data/diode.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import os
|
26 |
+
|
27 |
+
import numpy as np
|
28 |
+
import torch
|
29 |
+
from PIL import Image
|
30 |
+
from torch.utils.data import DataLoader, Dataset
|
31 |
+
from torchvision import transforms
|
32 |
+
|
33 |
+
|
34 |
+
class ToTensor(object):
|
35 |
+
def __init__(self):
|
36 |
+
# self.normalize = transforms.Normalize(
|
37 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
38 |
+
self.normalize = lambda x : x
|
39 |
+
self.resize = transforms.Resize(480)
|
40 |
+
|
41 |
+
def __call__(self, sample):
|
42 |
+
image, depth = sample['image'], sample['depth']
|
43 |
+
image = self.to_tensor(image)
|
44 |
+
image = self.normalize(image)
|
45 |
+
depth = self.to_tensor(depth)
|
46 |
+
|
47 |
+
image = self.resize(image)
|
48 |
+
|
49 |
+
return {'image': image, 'depth': depth, 'dataset': "diode"}
|
50 |
+
|
51 |
+
def to_tensor(self, pic):
|
52 |
+
|
53 |
+
if isinstance(pic, np.ndarray):
|
54 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
55 |
+
return img
|
56 |
+
|
57 |
+
# # handle PIL Image
|
58 |
+
if pic.mode == 'I':
|
59 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
60 |
+
elif pic.mode == 'I;16':
|
61 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
62 |
+
else:
|
63 |
+
img = torch.ByteTensor(
|
64 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
65 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
66 |
+
if pic.mode == 'YCbCr':
|
67 |
+
nchannel = 3
|
68 |
+
elif pic.mode == 'I;16':
|
69 |
+
nchannel = 1
|
70 |
+
else:
|
71 |
+
nchannel = len(pic.mode)
|
72 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
73 |
+
|
74 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
75 |
+
|
76 |
+
if isinstance(img, torch.ByteTensor):
|
77 |
+
return img.float()
|
78 |
+
else:
|
79 |
+
return img
|
80 |
+
|
81 |
+
|
82 |
+
class DIODE(Dataset):
|
83 |
+
def __init__(self, data_dir_root):
|
84 |
+
import glob
|
85 |
+
|
86 |
+
# image paths are of the form <data_dir_root>/scene_#/scan_#/*.png
|
87 |
+
self.image_files = glob.glob(
|
88 |
+
os.path.join(data_dir_root, '*', '*', '*.png'))
|
89 |
+
self.depth_files = [r.replace(".png", "_depth.npy")
|
90 |
+
for r in self.image_files]
|
91 |
+
self.depth_mask_files = [
|
92 |
+
r.replace(".png", "_depth_mask.npy") for r in self.image_files]
|
93 |
+
self.transform = ToTensor()
|
94 |
+
|
95 |
+
def __getitem__(self, idx):
|
96 |
+
image_path = self.image_files[idx]
|
97 |
+
depth_path = self.depth_files[idx]
|
98 |
+
depth_mask_path = self.depth_mask_files[idx]
|
99 |
+
|
100 |
+
image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
|
101 |
+
depth = np.load(depth_path) # in meters
|
102 |
+
valid = np.load(depth_mask_path) # binary
|
103 |
+
|
104 |
+
# depth[depth > 8] = -1
|
105 |
+
# depth = depth[..., None]
|
106 |
+
|
107 |
+
sample = dict(image=image, depth=depth, valid=valid)
|
108 |
+
|
109 |
+
# return sample
|
110 |
+
sample = self.transform(sample)
|
111 |
+
|
112 |
+
if idx == 0:
|
113 |
+
print(sample["image"].shape)
|
114 |
+
|
115 |
+
return sample
|
116 |
+
|
117 |
+
def __len__(self):
|
118 |
+
return len(self.image_files)
|
119 |
+
|
120 |
+
|
121 |
+
def get_diode_loader(data_dir_root, batch_size=1, **kwargs):
|
122 |
+
dataset = DIODE(data_dir_root)
|
123 |
+
return DataLoader(dataset, batch_size, **kwargs)
|
124 |
+
|
125 |
+
# get_diode_loader(data_dir_root="datasets/diode/val/outdoor")
|
metric_depth/zoedepth/data/hypersim.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import glob
|
26 |
+
import os
|
27 |
+
|
28 |
+
import h5py
|
29 |
+
import numpy as np
|
30 |
+
import torch
|
31 |
+
from PIL import Image
|
32 |
+
from torch.utils.data import DataLoader, Dataset
|
33 |
+
from torchvision import transforms
|
34 |
+
|
35 |
+
|
36 |
+
def hypersim_distance_to_depth(npyDistance):
|
37 |
+
intWidth, intHeight, fltFocal = 1024, 768, 886.81
|
38 |
+
|
39 |
+
npyImageplaneX = np.linspace((-0.5 * intWidth) + 0.5, (0.5 * intWidth) - 0.5, intWidth).reshape(
|
40 |
+
1, intWidth).repeat(intHeight, 0).astype(np.float32)[:, :, None]
|
41 |
+
npyImageplaneY = np.linspace((-0.5 * intHeight) + 0.5, (0.5 * intHeight) - 0.5,
|
42 |
+
intHeight).reshape(intHeight, 1).repeat(intWidth, 1).astype(np.float32)[:, :, None]
|
43 |
+
npyImageplaneZ = np.full([intHeight, intWidth, 1], fltFocal, np.float32)
|
44 |
+
npyImageplane = np.concatenate(
|
45 |
+
[npyImageplaneX, npyImageplaneY, npyImageplaneZ], 2)
|
46 |
+
|
47 |
+
npyDepth = npyDistance / np.linalg.norm(npyImageplane, 2, 2) * fltFocal
|
48 |
+
return npyDepth
|
49 |
+
|
50 |
+
|
51 |
+
class ToTensor(object):
|
52 |
+
def __init__(self):
|
53 |
+
# self.normalize = transforms.Normalize(
|
54 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
55 |
+
self.normalize = lambda x: x
|
56 |
+
self.resize = transforms.Resize((480, 640))
|
57 |
+
|
58 |
+
def __call__(self, sample):
|
59 |
+
image, depth = sample['image'], sample['depth']
|
60 |
+
image = self.to_tensor(image)
|
61 |
+
image = self.normalize(image)
|
62 |
+
depth = self.to_tensor(depth)
|
63 |
+
|
64 |
+
image = self.resize(image)
|
65 |
+
|
66 |
+
return {'image': image, 'depth': depth, 'dataset': "hypersim"}
|
67 |
+
|
68 |
+
def to_tensor(self, pic):
|
69 |
+
|
70 |
+
if isinstance(pic, np.ndarray):
|
71 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
72 |
+
return img
|
73 |
+
|
74 |
+
# # handle PIL Image
|
75 |
+
if pic.mode == 'I':
|
76 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
77 |
+
elif pic.mode == 'I;16':
|
78 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
79 |
+
else:
|
80 |
+
img = torch.ByteTensor(
|
81 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
82 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
83 |
+
if pic.mode == 'YCbCr':
|
84 |
+
nchannel = 3
|
85 |
+
elif pic.mode == 'I;16':
|
86 |
+
nchannel = 1
|
87 |
+
else:
|
88 |
+
nchannel = len(pic.mode)
|
89 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
90 |
+
|
91 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
92 |
+
if isinstance(img, torch.ByteTensor):
|
93 |
+
return img.float()
|
94 |
+
else:
|
95 |
+
return img
|
96 |
+
|
97 |
+
|
98 |
+
class HyperSim(Dataset):
|
99 |
+
def __init__(self, data_dir_root):
|
100 |
+
# image paths are of the form <data_dir_root>/<scene>/images/scene_cam_#_final_preview/*.tonemap.jpg
|
101 |
+
# depth paths are of the form <data_dir_root>/<scene>/images/scene_cam_#_final_preview/*.depth_meters.hdf5
|
102 |
+
self.image_files = glob.glob(os.path.join(
|
103 |
+
data_dir_root, '*', 'images', 'scene_cam_*_final_preview', '*.tonemap.jpg'))
|
104 |
+
self.depth_files = [r.replace("_final_preview", "_geometry_hdf5").replace(
|
105 |
+
".tonemap.jpg", ".depth_meters.hdf5") for r in self.image_files]
|
106 |
+
self.transform = ToTensor()
|
107 |
+
|
108 |
+
def __getitem__(self, idx):
|
109 |
+
image_path = self.image_files[idx]
|
110 |
+
depth_path = self.depth_files[idx]
|
111 |
+
|
112 |
+
image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
|
113 |
+
|
114 |
+
# depth from hdf5
|
115 |
+
depth_fd = h5py.File(depth_path, "r")
|
116 |
+
# in meters (Euclidean distance)
|
117 |
+
distance_meters = np.array(depth_fd['dataset'])
|
118 |
+
depth = hypersim_distance_to_depth(
|
119 |
+
distance_meters) # in meters (planar depth)
|
120 |
+
|
121 |
+
# depth[depth > 8] = -1
|
122 |
+
depth = depth[..., None]
|
123 |
+
|
124 |
+
sample = dict(image=image, depth=depth)
|
125 |
+
sample = self.transform(sample)
|
126 |
+
|
127 |
+
if idx == 0:
|
128 |
+
print(sample["image"].shape)
|
129 |
+
|
130 |
+
return sample
|
131 |
+
|
132 |
+
def __len__(self):
|
133 |
+
return len(self.image_files)
|
134 |
+
|
135 |
+
|
136 |
+
def get_hypersim_loader(data_dir_root, batch_size=1, **kwargs):
|
137 |
+
dataset = HyperSim(data_dir_root)
|
138 |
+
return DataLoader(dataset, batch_size, **kwargs)
|
metric_depth/zoedepth/data/ibims.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import os
|
26 |
+
|
27 |
+
import numpy as np
|
28 |
+
import torch
|
29 |
+
from PIL import Image
|
30 |
+
from torch.utils.data import DataLoader, Dataset
|
31 |
+
from torchvision import transforms as T
|
32 |
+
|
33 |
+
|
34 |
+
class iBims(Dataset):
|
35 |
+
def __init__(self, config):
|
36 |
+
root_folder = config.ibims_root
|
37 |
+
with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f:
|
38 |
+
imglist = f.read().split()
|
39 |
+
|
40 |
+
samples = []
|
41 |
+
for basename in imglist:
|
42 |
+
img_path = os.path.join(root_folder, 'rgb', basename + ".png")
|
43 |
+
depth_path = os.path.join(root_folder, 'depth', basename + ".png")
|
44 |
+
valid_mask_path = os.path.join(
|
45 |
+
root_folder, 'mask_invalid', basename+".png")
|
46 |
+
transp_mask_path = os.path.join(
|
47 |
+
root_folder, 'mask_transp', basename+".png")
|
48 |
+
|
49 |
+
samples.append(
|
50 |
+
(img_path, depth_path, valid_mask_path, transp_mask_path))
|
51 |
+
|
52 |
+
self.samples = samples
|
53 |
+
# self.normalize = T.Normalize(
|
54 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
55 |
+
self.normalize = lambda x : x
|
56 |
+
|
57 |
+
def __getitem__(self, idx):
|
58 |
+
img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx]
|
59 |
+
|
60 |
+
img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0
|
61 |
+
depth = np.asarray(Image.open(depth_path),
|
62 |
+
dtype=np.uint16).astype('float')*50.0/65535
|
63 |
+
|
64 |
+
mask_valid = np.asarray(Image.open(valid_mask_path))
|
65 |
+
mask_transp = np.asarray(Image.open(transp_mask_path))
|
66 |
+
|
67 |
+
# depth = depth * mask_valid * mask_transp
|
68 |
+
depth = np.where(mask_valid * mask_transp, depth, -1)
|
69 |
+
|
70 |
+
img = torch.from_numpy(img).permute(2, 0, 1)
|
71 |
+
img = self.normalize(img)
|
72 |
+
depth = torch.from_numpy(depth).unsqueeze(0)
|
73 |
+
return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims')
|
74 |
+
|
75 |
+
def __len__(self):
|
76 |
+
return len(self.samples)
|
77 |
+
|
78 |
+
|
79 |
+
def get_ibims_loader(config, batch_size=1, **kwargs):
|
80 |
+
dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs)
|
81 |
+
return dataloader
|
metric_depth/zoedepth/data/preprocess.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import numpy as np
|
26 |
+
from dataclasses import dataclass
|
27 |
+
from typing import Tuple, List
|
28 |
+
|
29 |
+
# dataclass to store the crop parameters
|
30 |
+
@dataclass
|
31 |
+
class CropParams:
|
32 |
+
top: int
|
33 |
+
bottom: int
|
34 |
+
left: int
|
35 |
+
right: int
|
36 |
+
|
37 |
+
|
38 |
+
|
39 |
+
def get_border_params(rgb_image, tolerance=0.1, cut_off=20, value=0, level_diff_threshold=5, channel_axis=-1, min_border=5) -> CropParams:
|
40 |
+
gray_image = np.mean(rgb_image, axis=channel_axis)
|
41 |
+
h, w = gray_image.shape
|
42 |
+
|
43 |
+
|
44 |
+
def num_value_pixels(arr):
|
45 |
+
return np.sum(np.abs(arr - value) < level_diff_threshold)
|
46 |
+
|
47 |
+
def is_above_tolerance(arr, total_pixels):
|
48 |
+
return (num_value_pixels(arr) / total_pixels) > tolerance
|
49 |
+
|
50 |
+
# Crop top border until number of value pixels become below tolerance
|
51 |
+
top = min_border
|
52 |
+
while is_above_tolerance(gray_image[top, :], w) and top < h-1:
|
53 |
+
top += 1
|
54 |
+
if top > cut_off:
|
55 |
+
break
|
56 |
+
|
57 |
+
# Crop bottom border until number of value pixels become below tolerance
|
58 |
+
bottom = h - min_border
|
59 |
+
while is_above_tolerance(gray_image[bottom, :], w) and bottom > 0:
|
60 |
+
bottom -= 1
|
61 |
+
if h - bottom > cut_off:
|
62 |
+
break
|
63 |
+
|
64 |
+
# Crop left border until number of value pixels become below tolerance
|
65 |
+
left = min_border
|
66 |
+
while is_above_tolerance(gray_image[:, left], h) and left < w-1:
|
67 |
+
left += 1
|
68 |
+
if left > cut_off:
|
69 |
+
break
|
70 |
+
|
71 |
+
# Crop right border until number of value pixels become below tolerance
|
72 |
+
right = w - min_border
|
73 |
+
while is_above_tolerance(gray_image[:, right], h) and right > 0:
|
74 |
+
right -= 1
|
75 |
+
if w - right > cut_off:
|
76 |
+
break
|
77 |
+
|
78 |
+
|
79 |
+
return CropParams(top, bottom, left, right)
|
80 |
+
|
81 |
+
|
82 |
+
def get_white_border(rgb_image, value=255, **kwargs) -> CropParams:
|
83 |
+
"""Crops the white border of the RGB.
|
84 |
+
|
85 |
+
Args:
|
86 |
+
rgb: RGB image, shape (H, W, 3).
|
87 |
+
Returns:
|
88 |
+
Crop parameters.
|
89 |
+
"""
|
90 |
+
if value == 255:
|
91 |
+
# assert range of values in rgb image is [0, 255]
|
92 |
+
assert np.max(rgb_image) <= 255 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 255]."
|
93 |
+
assert rgb_image.max() > 1, "RGB image values are not in range [0, 255]."
|
94 |
+
elif value == 1:
|
95 |
+
# assert range of values in rgb image is [0, 1]
|
96 |
+
assert np.max(rgb_image) <= 1 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 1]."
|
97 |
+
|
98 |
+
return get_border_params(rgb_image, value=value, **kwargs)
|
99 |
+
|
100 |
+
def get_black_border(rgb_image, **kwargs) -> CropParams:
|
101 |
+
"""Crops the black border of the RGB.
|
102 |
+
|
103 |
+
Args:
|
104 |
+
rgb: RGB image, shape (H, W, 3).
|
105 |
+
|
106 |
+
Returns:
|
107 |
+
Crop parameters.
|
108 |
+
"""
|
109 |
+
|
110 |
+
return get_border_params(rgb_image, value=0, **kwargs)
|
111 |
+
|
112 |
+
def crop_image(image: np.ndarray, crop_params: CropParams) -> np.ndarray:
|
113 |
+
"""Crops the image according to the crop parameters.
|
114 |
+
|
115 |
+
Args:
|
116 |
+
image: RGB or depth image, shape (H, W, 3) or (H, W).
|
117 |
+
crop_params: Crop parameters.
|
118 |
+
|
119 |
+
Returns:
|
120 |
+
Cropped image.
|
121 |
+
"""
|
122 |
+
return image[crop_params.top:crop_params.bottom, crop_params.left:crop_params.right]
|
123 |
+
|
124 |
+
def crop_images(*images: np.ndarray, crop_params: CropParams) -> Tuple[np.ndarray]:
|
125 |
+
"""Crops the images according to the crop parameters.
|
126 |
+
|
127 |
+
Args:
|
128 |
+
images: RGB or depth images, shape (H, W, 3) or (H, W).
|
129 |
+
crop_params: Crop parameters.
|
130 |
+
|
131 |
+
Returns:
|
132 |
+
Cropped images.
|
133 |
+
"""
|
134 |
+
return tuple(crop_image(image, crop_params) for image in images)
|
135 |
+
|
136 |
+
def crop_black_or_white_border(rgb_image, *other_images: np.ndarray, tolerance=0.1, cut_off=20, level_diff_threshold=5) -> Tuple[np.ndarray]:
|
137 |
+
"""Crops the white and black border of the RGB and depth images.
|
138 |
+
|
139 |
+
Args:
|
140 |
+
rgb: RGB image, shape (H, W, 3). This image is used to determine the border.
|
141 |
+
other_images: The other images to crop according to the border of the RGB image.
|
142 |
+
Returns:
|
143 |
+
Cropped RGB and other images.
|
144 |
+
"""
|
145 |
+
# crop black border
|
146 |
+
crop_params = get_black_border(rgb_image, tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold)
|
147 |
+
cropped_images = crop_images(rgb_image, *other_images, crop_params=crop_params)
|
148 |
+
|
149 |
+
# crop white border
|
150 |
+
crop_params = get_white_border(cropped_images[0], tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold)
|
151 |
+
cropped_images = crop_images(*cropped_images, crop_params=crop_params)
|
152 |
+
|
153 |
+
return cropped_images
|
154 |
+
|
metric_depth/zoedepth/data/sun_rgbd_loader.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import os
|
26 |
+
|
27 |
+
import numpy as np
|
28 |
+
import torch
|
29 |
+
from PIL import Image
|
30 |
+
from torch.utils.data import DataLoader, Dataset
|
31 |
+
from torchvision import transforms
|
32 |
+
|
33 |
+
|
34 |
+
class ToTensor(object):
|
35 |
+
def __init__(self):
|
36 |
+
# self.normalize = transforms.Normalize(
|
37 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
38 |
+
self.normalize = lambda x : x
|
39 |
+
|
40 |
+
def __call__(self, sample):
|
41 |
+
image, depth = sample['image'], sample['depth']
|
42 |
+
image = self.to_tensor(image)
|
43 |
+
image = self.normalize(image)
|
44 |
+
depth = self.to_tensor(depth)
|
45 |
+
|
46 |
+
return {'image': image, 'depth': depth, 'dataset': "sunrgbd"}
|
47 |
+
|
48 |
+
def to_tensor(self, pic):
|
49 |
+
|
50 |
+
if isinstance(pic, np.ndarray):
|
51 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
52 |
+
return img
|
53 |
+
|
54 |
+
# # handle PIL Image
|
55 |
+
if pic.mode == 'I':
|
56 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
57 |
+
elif pic.mode == 'I;16':
|
58 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
59 |
+
else:
|
60 |
+
img = torch.ByteTensor(
|
61 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
62 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
63 |
+
if pic.mode == 'YCbCr':
|
64 |
+
nchannel = 3
|
65 |
+
elif pic.mode == 'I;16':
|
66 |
+
nchannel = 1
|
67 |
+
else:
|
68 |
+
nchannel = len(pic.mode)
|
69 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
70 |
+
|
71 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
72 |
+
if isinstance(img, torch.ByteTensor):
|
73 |
+
return img.float()
|
74 |
+
else:
|
75 |
+
return img
|
76 |
+
|
77 |
+
|
78 |
+
class SunRGBD(Dataset):
|
79 |
+
def __init__(self, data_dir_root):
|
80 |
+
# test_file_dirs = loadmat(train_test_file)['alltest'].squeeze()
|
81 |
+
# all_test = [t[0].replace("/n/fs/sun3d/data/", "") for t in test_file_dirs]
|
82 |
+
# self.all_test = [os.path.join(data_dir_root, t) for t in all_test]
|
83 |
+
import glob
|
84 |
+
# self.image_files = glob.glob(
|
85 |
+
# os.path.join(data_dir_root, 'rgb', 'rgb', '*'))
|
86 |
+
# self.depth_files = [
|
87 |
+
# r.replace("rgb/rgb", "gt/gt").replace("jpg", "png") for r in self.image_files]
|
88 |
+
|
89 |
+
self.image_files, self.depth_files = [], []
|
90 |
+
filenames = os.listdir(os.path.join(data_dir_root, 'rgb'))
|
91 |
+
for i, filename in enumerate(filenames):
|
92 |
+
self.image_files.append(os.path.join(data_dir_root, 'rgb', filename))
|
93 |
+
base_num = int(filename.replace('.jpg', '').replace('img-', ''))
|
94 |
+
self.depth_files.append(os.path.join(data_dir_root, 'depth', str(base_num) + '.png'))
|
95 |
+
|
96 |
+
self.transform = ToTensor()
|
97 |
+
|
98 |
+
def __getitem__(self, idx):
|
99 |
+
image_path = self.image_files[idx]
|
100 |
+
depth_path = self.depth_files[idx]
|
101 |
+
|
102 |
+
image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
|
103 |
+
depth = np.asarray(Image.open(depth_path), dtype='uint16') / 10000.0
|
104 |
+
# print(depth, depth.min(), depth.max())
|
105 |
+
depth[depth > 8] = -1
|
106 |
+
depth = depth[..., None]
|
107 |
+
return self.transform(dict(image=image, depth=depth))
|
108 |
+
|
109 |
+
def __len__(self):
|
110 |
+
return len(self.image_files)
|
111 |
+
|
112 |
+
|
113 |
+
def get_sunrgbd_loader(data_dir_root, batch_size=1, **kwargs):
|
114 |
+
dataset = SunRGBD(data_dir_root)
|
115 |
+
return DataLoader(dataset, batch_size, **kwargs)
|
metric_depth/zoedepth/data/transforms.py
ADDED
@@ -0,0 +1,481 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import math
|
26 |
+
import random
|
27 |
+
|
28 |
+
import cv2
|
29 |
+
import numpy as np
|
30 |
+
|
31 |
+
|
32 |
+
class RandomFliplr(object):
|
33 |
+
"""Horizontal flip of the sample with given probability.
|
34 |
+
"""
|
35 |
+
|
36 |
+
def __init__(self, probability=0.5):
|
37 |
+
"""Init.
|
38 |
+
|
39 |
+
Args:
|
40 |
+
probability (float, optional): Flip probability. Defaults to 0.5.
|
41 |
+
"""
|
42 |
+
self.__probability = probability
|
43 |
+
|
44 |
+
def __call__(self, sample):
|
45 |
+
prob = random.random()
|
46 |
+
|
47 |
+
if prob < self.__probability:
|
48 |
+
for k, v in sample.items():
|
49 |
+
if len(v.shape) >= 2:
|
50 |
+
sample[k] = np.fliplr(v).copy()
|
51 |
+
|
52 |
+
return sample
|
53 |
+
|
54 |
+
|
55 |
+
def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
|
56 |
+
"""Rezise the sample to ensure the given size. Keeps aspect ratio.
|
57 |
+
|
58 |
+
Args:
|
59 |
+
sample (dict): sample
|
60 |
+
size (tuple): image size
|
61 |
+
|
62 |
+
Returns:
|
63 |
+
tuple: new size
|
64 |
+
"""
|
65 |
+
shape = list(sample["disparity"].shape)
|
66 |
+
|
67 |
+
if shape[0] >= size[0] and shape[1] >= size[1]:
|
68 |
+
return sample
|
69 |
+
|
70 |
+
scale = [0, 0]
|
71 |
+
scale[0] = size[0] / shape[0]
|
72 |
+
scale[1] = size[1] / shape[1]
|
73 |
+
|
74 |
+
scale = max(scale)
|
75 |
+
|
76 |
+
shape[0] = math.ceil(scale * shape[0])
|
77 |
+
shape[1] = math.ceil(scale * shape[1])
|
78 |
+
|
79 |
+
# resize
|
80 |
+
sample["image"] = cv2.resize(
|
81 |
+
sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method
|
82 |
+
)
|
83 |
+
|
84 |
+
sample["disparity"] = cv2.resize(
|
85 |
+
sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST
|
86 |
+
)
|
87 |
+
sample["mask"] = cv2.resize(
|
88 |
+
sample["mask"].astype(np.float32),
|
89 |
+
tuple(shape[::-1]),
|
90 |
+
interpolation=cv2.INTER_NEAREST,
|
91 |
+
)
|
92 |
+
sample["mask"] = sample["mask"].astype(bool)
|
93 |
+
|
94 |
+
return tuple(shape)
|
95 |
+
|
96 |
+
|
97 |
+
class RandomCrop(object):
|
98 |
+
"""Get a random crop of the sample with the given size (width, height).
|
99 |
+
"""
|
100 |
+
|
101 |
+
def __init__(
|
102 |
+
self,
|
103 |
+
width,
|
104 |
+
height,
|
105 |
+
resize_if_needed=False,
|
106 |
+
image_interpolation_method=cv2.INTER_AREA,
|
107 |
+
):
|
108 |
+
"""Init.
|
109 |
+
|
110 |
+
Args:
|
111 |
+
width (int): output width
|
112 |
+
height (int): output height
|
113 |
+
resize_if_needed (bool, optional): If True, sample might be upsampled to ensure
|
114 |
+
that a crop of size (width, height) is possbile. Defaults to False.
|
115 |
+
"""
|
116 |
+
self.__size = (height, width)
|
117 |
+
self.__resize_if_needed = resize_if_needed
|
118 |
+
self.__image_interpolation_method = image_interpolation_method
|
119 |
+
|
120 |
+
def __call__(self, sample):
|
121 |
+
|
122 |
+
shape = sample["disparity"].shape
|
123 |
+
|
124 |
+
if self.__size[0] > shape[0] or self.__size[1] > shape[1]:
|
125 |
+
if self.__resize_if_needed:
|
126 |
+
shape = apply_min_size(
|
127 |
+
sample, self.__size, self.__image_interpolation_method
|
128 |
+
)
|
129 |
+
else:
|
130 |
+
raise Exception(
|
131 |
+
"Output size {} bigger than input size {}.".format(
|
132 |
+
self.__size, shape
|
133 |
+
)
|
134 |
+
)
|
135 |
+
|
136 |
+
offset = (
|
137 |
+
np.random.randint(shape[0] - self.__size[0] + 1),
|
138 |
+
np.random.randint(shape[1] - self.__size[1] + 1),
|
139 |
+
)
|
140 |
+
|
141 |
+
for k, v in sample.items():
|
142 |
+
if k == "code" or k == "basis":
|
143 |
+
continue
|
144 |
+
|
145 |
+
if len(sample[k].shape) >= 2:
|
146 |
+
sample[k] = v[
|
147 |
+
offset[0]: offset[0] + self.__size[0],
|
148 |
+
offset[1]: offset[1] + self.__size[1],
|
149 |
+
]
|
150 |
+
|
151 |
+
return sample
|
152 |
+
|
153 |
+
|
154 |
+
class Resize(object):
|
155 |
+
"""Resize sample to given size (width, height).
|
156 |
+
"""
|
157 |
+
|
158 |
+
def __init__(
|
159 |
+
self,
|
160 |
+
width,
|
161 |
+
height,
|
162 |
+
resize_target=True,
|
163 |
+
keep_aspect_ratio=False,
|
164 |
+
ensure_multiple_of=1,
|
165 |
+
resize_method="lower_bound",
|
166 |
+
image_interpolation_method=cv2.INTER_AREA,
|
167 |
+
letter_box=False,
|
168 |
+
):
|
169 |
+
"""Init.
|
170 |
+
|
171 |
+
Args:
|
172 |
+
width (int): desired output width
|
173 |
+
height (int): desired output height
|
174 |
+
resize_target (bool, optional):
|
175 |
+
True: Resize the full sample (image, mask, target).
|
176 |
+
False: Resize image only.
|
177 |
+
Defaults to True.
|
178 |
+
keep_aspect_ratio (bool, optional):
|
179 |
+
True: Keep the aspect ratio of the input sample.
|
180 |
+
Output sample might not have the given width and height, and
|
181 |
+
resize behaviour depends on the parameter 'resize_method'.
|
182 |
+
Defaults to False.
|
183 |
+
ensure_multiple_of (int, optional):
|
184 |
+
Output width and height is constrained to be multiple of this parameter.
|
185 |
+
Defaults to 1.
|
186 |
+
resize_method (str, optional):
|
187 |
+
"lower_bound": Output will be at least as large as the given size.
|
188 |
+
"upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
|
189 |
+
"minimal": Scale as least as possible. (Output size might be smaller than given size.)
|
190 |
+
Defaults to "lower_bound".
|
191 |
+
"""
|
192 |
+
self.__width = width
|
193 |
+
self.__height = height
|
194 |
+
|
195 |
+
self.__resize_target = resize_target
|
196 |
+
self.__keep_aspect_ratio = keep_aspect_ratio
|
197 |
+
self.__multiple_of = ensure_multiple_of
|
198 |
+
self.__resize_method = resize_method
|
199 |
+
self.__image_interpolation_method = image_interpolation_method
|
200 |
+
self.__letter_box = letter_box
|
201 |
+
|
202 |
+
def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
|
203 |
+
y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
204 |
+
|
205 |
+
if max_val is not None and y > max_val:
|
206 |
+
y = (np.floor(x / self.__multiple_of)
|
207 |
+
* self.__multiple_of).astype(int)
|
208 |
+
|
209 |
+
if y < min_val:
|
210 |
+
y = (np.ceil(x / self.__multiple_of)
|
211 |
+
* self.__multiple_of).astype(int)
|
212 |
+
|
213 |
+
return y
|
214 |
+
|
215 |
+
def get_size(self, width, height):
|
216 |
+
# determine new height and width
|
217 |
+
scale_height = self.__height / height
|
218 |
+
scale_width = self.__width / width
|
219 |
+
|
220 |
+
if self.__keep_aspect_ratio:
|
221 |
+
if self.__resize_method == "lower_bound":
|
222 |
+
# scale such that output size is lower bound
|
223 |
+
if scale_width > scale_height:
|
224 |
+
# fit width
|
225 |
+
scale_height = scale_width
|
226 |
+
else:
|
227 |
+
# fit height
|
228 |
+
scale_width = scale_height
|
229 |
+
elif self.__resize_method == "upper_bound":
|
230 |
+
# scale such that output size is upper bound
|
231 |
+
if scale_width < scale_height:
|
232 |
+
# fit width
|
233 |
+
scale_height = scale_width
|
234 |
+
else:
|
235 |
+
# fit height
|
236 |
+
scale_width = scale_height
|
237 |
+
elif self.__resize_method == "minimal":
|
238 |
+
# scale as least as possbile
|
239 |
+
if abs(1 - scale_width) < abs(1 - scale_height):
|
240 |
+
# fit width
|
241 |
+
scale_height = scale_width
|
242 |
+
else:
|
243 |
+
# fit height
|
244 |
+
scale_width = scale_height
|
245 |
+
else:
|
246 |
+
raise ValueError(
|
247 |
+
f"resize_method {self.__resize_method} not implemented"
|
248 |
+
)
|
249 |
+
|
250 |
+
if self.__resize_method == "lower_bound":
|
251 |
+
new_height = self.constrain_to_multiple_of(
|
252 |
+
scale_height * height, min_val=self.__height
|
253 |
+
)
|
254 |
+
new_width = self.constrain_to_multiple_of(
|
255 |
+
scale_width * width, min_val=self.__width
|
256 |
+
)
|
257 |
+
elif self.__resize_method == "upper_bound":
|
258 |
+
new_height = self.constrain_to_multiple_of(
|
259 |
+
scale_height * height, max_val=self.__height
|
260 |
+
)
|
261 |
+
new_width = self.constrain_to_multiple_of(
|
262 |
+
scale_width * width, max_val=self.__width
|
263 |
+
)
|
264 |
+
elif self.__resize_method == "minimal":
|
265 |
+
new_height = self.constrain_to_multiple_of(scale_height * height)
|
266 |
+
new_width = self.constrain_to_multiple_of(scale_width * width)
|
267 |
+
else:
|
268 |
+
raise ValueError(
|
269 |
+
f"resize_method {self.__resize_method} not implemented")
|
270 |
+
|
271 |
+
return (new_width, new_height)
|
272 |
+
|
273 |
+
def make_letter_box(self, sample):
|
274 |
+
top = bottom = (self.__height - sample.shape[0]) // 2
|
275 |
+
left = right = (self.__width - sample.shape[1]) // 2
|
276 |
+
sample = cv2.copyMakeBorder(
|
277 |
+
sample, top, bottom, left, right, cv2.BORDER_CONSTANT, None, 0)
|
278 |
+
return sample
|
279 |
+
|
280 |
+
def __call__(self, sample):
|
281 |
+
width, height = self.get_size(
|
282 |
+
sample["image"].shape[1], sample["image"].shape[0]
|
283 |
+
)
|
284 |
+
|
285 |
+
# resize sample
|
286 |
+
sample["image"] = cv2.resize(
|
287 |
+
sample["image"],
|
288 |
+
(width, height),
|
289 |
+
interpolation=self.__image_interpolation_method,
|
290 |
+
)
|
291 |
+
|
292 |
+
if self.__letter_box:
|
293 |
+
sample["image"] = self.make_letter_box(sample["image"])
|
294 |
+
|
295 |
+
if self.__resize_target:
|
296 |
+
if "disparity" in sample:
|
297 |
+
sample["disparity"] = cv2.resize(
|
298 |
+
sample["disparity"],
|
299 |
+
(width, height),
|
300 |
+
interpolation=cv2.INTER_NEAREST,
|
301 |
+
)
|
302 |
+
|
303 |
+
if self.__letter_box:
|
304 |
+
sample["disparity"] = self.make_letter_box(
|
305 |
+
sample["disparity"])
|
306 |
+
|
307 |
+
if "depth" in sample:
|
308 |
+
sample["depth"] = cv2.resize(
|
309 |
+
sample["depth"], (width,
|
310 |
+
height), interpolation=cv2.INTER_NEAREST
|
311 |
+
)
|
312 |
+
|
313 |
+
if self.__letter_box:
|
314 |
+
sample["depth"] = self.make_letter_box(sample["depth"])
|
315 |
+
|
316 |
+
sample["mask"] = cv2.resize(
|
317 |
+
sample["mask"].astype(np.float32),
|
318 |
+
(width, height),
|
319 |
+
interpolation=cv2.INTER_NEAREST,
|
320 |
+
)
|
321 |
+
|
322 |
+
if self.__letter_box:
|
323 |
+
sample["mask"] = self.make_letter_box(sample["mask"])
|
324 |
+
|
325 |
+
sample["mask"] = sample["mask"].astype(bool)
|
326 |
+
|
327 |
+
return sample
|
328 |
+
|
329 |
+
|
330 |
+
class ResizeFixed(object):
|
331 |
+
def __init__(self, size):
|
332 |
+
self.__size = size
|
333 |
+
|
334 |
+
def __call__(self, sample):
|
335 |
+
sample["image"] = cv2.resize(
|
336 |
+
sample["image"], self.__size[::-1], interpolation=cv2.INTER_LINEAR
|
337 |
+
)
|
338 |
+
|
339 |
+
sample["disparity"] = cv2.resize(
|
340 |
+
sample["disparity"], self.__size[::-
|
341 |
+
1], interpolation=cv2.INTER_NEAREST
|
342 |
+
)
|
343 |
+
|
344 |
+
sample["mask"] = cv2.resize(
|
345 |
+
sample["mask"].astype(np.float32),
|
346 |
+
self.__size[::-1],
|
347 |
+
interpolation=cv2.INTER_NEAREST,
|
348 |
+
)
|
349 |
+
sample["mask"] = sample["mask"].astype(bool)
|
350 |
+
|
351 |
+
return sample
|
352 |
+
|
353 |
+
|
354 |
+
class Rescale(object):
|
355 |
+
"""Rescale target values to the interval [0, max_val].
|
356 |
+
If input is constant, values are set to max_val / 2.
|
357 |
+
"""
|
358 |
+
|
359 |
+
def __init__(self, max_val=1.0, use_mask=True):
|
360 |
+
"""Init.
|
361 |
+
|
362 |
+
Args:
|
363 |
+
max_val (float, optional): Max output value. Defaults to 1.0.
|
364 |
+
use_mask (bool, optional): Only operate on valid pixels (mask == True). Defaults to True.
|
365 |
+
"""
|
366 |
+
self.__max_val = max_val
|
367 |
+
self.__use_mask = use_mask
|
368 |
+
|
369 |
+
def __call__(self, sample):
|
370 |
+
disp = sample["disparity"]
|
371 |
+
|
372 |
+
if self.__use_mask:
|
373 |
+
mask = sample["mask"]
|
374 |
+
else:
|
375 |
+
mask = np.ones_like(disp, dtype=np.bool)
|
376 |
+
|
377 |
+
if np.sum(mask) == 0:
|
378 |
+
return sample
|
379 |
+
|
380 |
+
min_val = np.min(disp[mask])
|
381 |
+
max_val = np.max(disp[mask])
|
382 |
+
|
383 |
+
if max_val > min_val:
|
384 |
+
sample["disparity"][mask] = (
|
385 |
+
(disp[mask] - min_val) / (max_val - min_val) * self.__max_val
|
386 |
+
)
|
387 |
+
else:
|
388 |
+
sample["disparity"][mask] = np.ones_like(
|
389 |
+
disp[mask]) * self.__max_val / 2.0
|
390 |
+
|
391 |
+
return sample
|
392 |
+
|
393 |
+
|
394 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
|
395 |
+
class NormalizeImage(object):
|
396 |
+
"""Normlize image by given mean and std.
|
397 |
+
"""
|
398 |
+
|
399 |
+
def __init__(self, mean, std):
|
400 |
+
self.__mean = mean
|
401 |
+
self.__std = std
|
402 |
+
|
403 |
+
def __call__(self, sample):
|
404 |
+
sample["image"] = (sample["image"] - self.__mean) / self.__std
|
405 |
+
|
406 |
+
return sample
|
407 |
+
|
408 |
+
|
409 |
+
class DepthToDisparity(object):
|
410 |
+
"""Convert depth to disparity. Removes depth from sample.
|
411 |
+
"""
|
412 |
+
|
413 |
+
def __init__(self, eps=1e-4):
|
414 |
+
self.__eps = eps
|
415 |
+
|
416 |
+
def __call__(self, sample):
|
417 |
+
assert "depth" in sample
|
418 |
+
|
419 |
+
sample["mask"][sample["depth"] < self.__eps] = False
|
420 |
+
|
421 |
+
sample["disparity"] = np.zeros_like(sample["depth"])
|
422 |
+
sample["disparity"][sample["depth"] >= self.__eps] = (
|
423 |
+
1.0 / sample["depth"][sample["depth"] >= self.__eps]
|
424 |
+
)
|
425 |
+
|
426 |
+
del sample["depth"]
|
427 |
+
|
428 |
+
return sample
|
429 |
+
|
430 |
+
|
431 |
+
class DisparityToDepth(object):
|
432 |
+
"""Convert disparity to depth. Removes disparity from sample.
|
433 |
+
"""
|
434 |
+
|
435 |
+
def __init__(self, eps=1e-4):
|
436 |
+
self.__eps = eps
|
437 |
+
|
438 |
+
def __call__(self, sample):
|
439 |
+
assert "disparity" in sample
|
440 |
+
|
441 |
+
disp = np.abs(sample["disparity"])
|
442 |
+
sample["mask"][disp < self.__eps] = False
|
443 |
+
|
444 |
+
# print(sample["disparity"])
|
445 |
+
# print(sample["mask"].sum())
|
446 |
+
# exit()
|
447 |
+
|
448 |
+
sample["depth"] = np.zeros_like(disp)
|
449 |
+
sample["depth"][disp >= self.__eps] = (
|
450 |
+
1.0 / disp[disp >= self.__eps]
|
451 |
+
)
|
452 |
+
|
453 |
+
del sample["disparity"]
|
454 |
+
|
455 |
+
return sample
|
456 |
+
|
457 |
+
|
458 |
+
class PrepareForNet(object):
|
459 |
+
"""Prepare sample for usage as network input.
|
460 |
+
"""
|
461 |
+
|
462 |
+
def __init__(self):
|
463 |
+
pass
|
464 |
+
|
465 |
+
def __call__(self, sample):
|
466 |
+
image = np.transpose(sample["image"], (2, 0, 1))
|
467 |
+
sample["image"] = np.ascontiguousarray(image).astype(np.float32)
|
468 |
+
|
469 |
+
if "mask" in sample:
|
470 |
+
sample["mask"] = sample["mask"].astype(np.float32)
|
471 |
+
sample["mask"] = np.ascontiguousarray(sample["mask"])
|
472 |
+
|
473 |
+
if "disparity" in sample:
|
474 |
+
disparity = sample["disparity"].astype(np.float32)
|
475 |
+
sample["disparity"] = np.ascontiguousarray(disparity)
|
476 |
+
|
477 |
+
if "depth" in sample:
|
478 |
+
depth = sample["depth"].astype(np.float32)
|
479 |
+
sample["depth"] = np.ascontiguousarray(depth)
|
480 |
+
|
481 |
+
return sample
|
metric_depth/zoedepth/data/vkitti.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import torch
|
26 |
+
from torch.utils.data import Dataset, DataLoader
|
27 |
+
from torchvision import transforms
|
28 |
+
import os
|
29 |
+
|
30 |
+
from PIL import Image
|
31 |
+
import numpy as np
|
32 |
+
import cv2
|
33 |
+
|
34 |
+
|
35 |
+
class ToTensor(object):
|
36 |
+
def __init__(self):
|
37 |
+
self.normalize = transforms.Normalize(
|
38 |
+
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
39 |
+
# self.resize = transforms.Resize((375, 1242))
|
40 |
+
|
41 |
+
def __call__(self, sample):
|
42 |
+
image, depth = sample['image'], sample['depth']
|
43 |
+
|
44 |
+
image = self.to_tensor(image)
|
45 |
+
image = self.normalize(image)
|
46 |
+
depth = self.to_tensor(depth)
|
47 |
+
|
48 |
+
# image = self.resize(image)
|
49 |
+
|
50 |
+
return {'image': image, 'depth': depth, 'dataset': "vkitti"}
|
51 |
+
|
52 |
+
def to_tensor(self, pic):
|
53 |
+
|
54 |
+
if isinstance(pic, np.ndarray):
|
55 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
56 |
+
return img
|
57 |
+
|
58 |
+
# # handle PIL Image
|
59 |
+
if pic.mode == 'I':
|
60 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
61 |
+
elif pic.mode == 'I;16':
|
62 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
63 |
+
else:
|
64 |
+
img = torch.ByteTensor(
|
65 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
66 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
67 |
+
if pic.mode == 'YCbCr':
|
68 |
+
nchannel = 3
|
69 |
+
elif pic.mode == 'I;16':
|
70 |
+
nchannel = 1
|
71 |
+
else:
|
72 |
+
nchannel = len(pic.mode)
|
73 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
74 |
+
|
75 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
76 |
+
if isinstance(img, torch.ByteTensor):
|
77 |
+
return img.float()
|
78 |
+
else:
|
79 |
+
return img
|
80 |
+
|
81 |
+
|
82 |
+
class VKITTI(Dataset):
|
83 |
+
def __init__(self, data_dir_root, do_kb_crop=True):
|
84 |
+
import glob
|
85 |
+
# image paths are of the form <data_dir_root>/{HR, LR}/<scene>/{color, depth_filled}/*.png
|
86 |
+
self.image_files = glob.glob(os.path.join(
|
87 |
+
data_dir_root, "test_color", '*.png'))
|
88 |
+
self.depth_files = [r.replace("test_color", "test_depth")
|
89 |
+
for r in self.image_files]
|
90 |
+
self.do_kb_crop = True
|
91 |
+
self.transform = ToTensor()
|
92 |
+
|
93 |
+
def __getitem__(self, idx):
|
94 |
+
image_path = self.image_files[idx]
|
95 |
+
depth_path = self.depth_files[idx]
|
96 |
+
|
97 |
+
image = Image.open(image_path)
|
98 |
+
depth = Image.open(depth_path)
|
99 |
+
depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR |
|
100 |
+
cv2.IMREAD_ANYDEPTH)
|
101 |
+
print("dpeth min max", depth.min(), depth.max())
|
102 |
+
|
103 |
+
# print(np.shape(image))
|
104 |
+
# print(np.shape(depth))
|
105 |
+
|
106 |
+
# depth[depth > 8] = -1
|
107 |
+
|
108 |
+
if self.do_kb_crop and False:
|
109 |
+
height = image.height
|
110 |
+
width = image.width
|
111 |
+
top_margin = int(height - 352)
|
112 |
+
left_margin = int((width - 1216) / 2)
|
113 |
+
depth = depth.crop(
|
114 |
+
(left_margin, top_margin, left_margin + 1216, top_margin + 352))
|
115 |
+
image = image.crop(
|
116 |
+
(left_margin, top_margin, left_margin + 1216, top_margin + 352))
|
117 |
+
# uv = uv[:, top_margin:top_margin + 352, left_margin:left_margin + 1216]
|
118 |
+
|
119 |
+
image = np.asarray(image, dtype=np.float32) / 255.0
|
120 |
+
# depth = np.asarray(depth, dtype=np.uint16) /1.
|
121 |
+
depth = depth[..., None]
|
122 |
+
sample = dict(image=image, depth=depth)
|
123 |
+
|
124 |
+
# return sample
|
125 |
+
sample = self.transform(sample)
|
126 |
+
|
127 |
+
if idx == 0:
|
128 |
+
print(sample["image"].shape)
|
129 |
+
|
130 |
+
return sample
|
131 |
+
|
132 |
+
def __len__(self):
|
133 |
+
return len(self.image_files)
|
134 |
+
|
135 |
+
|
136 |
+
def get_vkitti_loader(data_dir_root, batch_size=1, **kwargs):
|
137 |
+
dataset = VKITTI(data_dir_root)
|
138 |
+
return DataLoader(dataset, batch_size, **kwargs)
|
139 |
+
|
140 |
+
|
141 |
+
if __name__ == "__main__":
|
142 |
+
loader = get_vkitti_loader(
|
143 |
+
data_dir_root="/home/bhatsf/shortcuts/datasets/vkitti_test")
|
144 |
+
print("Total files", len(loader.dataset))
|
145 |
+
for i, sample in enumerate(loader):
|
146 |
+
print(sample["image"].shape)
|
147 |
+
print(sample["depth"].shape)
|
148 |
+
print(sample["dataset"])
|
149 |
+
print(sample['depth'].min(), sample['depth'].max())
|
150 |
+
if i > 5:
|
151 |
+
break
|
metric_depth/zoedepth/data/vkitti2.py
ADDED
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import os
|
26 |
+
|
27 |
+
import cv2
|
28 |
+
import numpy as np
|
29 |
+
import torch
|
30 |
+
from PIL import Image
|
31 |
+
from torch.utils.data import DataLoader, Dataset
|
32 |
+
from torchvision import transforms
|
33 |
+
|
34 |
+
|
35 |
+
class ToTensor(object):
|
36 |
+
def __init__(self):
|
37 |
+
# self.normalize = transforms.Normalize(
|
38 |
+
# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
39 |
+
self.normalize = lambda x: x
|
40 |
+
# self.resize = transforms.Resize((375, 1242))
|
41 |
+
|
42 |
+
def __call__(self, sample):
|
43 |
+
image, depth = sample['image'], sample['depth']
|
44 |
+
|
45 |
+
image = self.to_tensor(image)
|
46 |
+
image = self.normalize(image)
|
47 |
+
depth = self.to_tensor(depth)
|
48 |
+
|
49 |
+
# image = self.resize(image)
|
50 |
+
|
51 |
+
return {'image': image, 'depth': depth, 'dataset': "vkitti"}
|
52 |
+
|
53 |
+
def to_tensor(self, pic):
|
54 |
+
|
55 |
+
if isinstance(pic, np.ndarray):
|
56 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
57 |
+
return img
|
58 |
+
|
59 |
+
# # handle PIL Image
|
60 |
+
if pic.mode == 'I':
|
61 |
+
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
62 |
+
elif pic.mode == 'I;16':
|
63 |
+
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
64 |
+
else:
|
65 |
+
img = torch.ByteTensor(
|
66 |
+
torch.ByteStorage.from_buffer(pic.tobytes()))
|
67 |
+
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
68 |
+
if pic.mode == 'YCbCr':
|
69 |
+
nchannel = 3
|
70 |
+
elif pic.mode == 'I;16':
|
71 |
+
nchannel = 1
|
72 |
+
else:
|
73 |
+
nchannel = len(pic.mode)
|
74 |
+
img = img.view(pic.size[1], pic.size[0], nchannel)
|
75 |
+
|
76 |
+
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
77 |
+
if isinstance(img, torch.ByteTensor):
|
78 |
+
return img.float()
|
79 |
+
else:
|
80 |
+
return img
|
81 |
+
|
82 |
+
|
83 |
+
class VKITTI2(Dataset):
|
84 |
+
def __init__(self, data_dir_root, do_kb_crop=True, split="test"):
|
85 |
+
import glob
|
86 |
+
|
87 |
+
# image paths are of the form <data_dir_root>/rgb/<scene>/<variant>/frames/<rgb,depth>/Camera<0,1>/rgb_{}.jpg
|
88 |
+
self.image_files = glob.glob(os.path.join(
|
89 |
+
data_dir_root, "**", "frames", "rgb", "Camera_0", '*.jpg'), recursive=True)
|
90 |
+
self.depth_files = [r.replace("/rgb/", "/depth/").replace(
|
91 |
+
"rgb_", "depth_").replace(".jpg", ".png") for r in self.image_files]
|
92 |
+
self.do_kb_crop = True
|
93 |
+
self.transform = ToTensor()
|
94 |
+
|
95 |
+
# If train test split is not created, then create one.
|
96 |
+
# Split is such that 8% of the frames from each scene are used for testing.
|
97 |
+
if not os.path.exists(os.path.join(data_dir_root, "train.txt")):
|
98 |
+
import random
|
99 |
+
scenes = set([os.path.basename(os.path.dirname(
|
100 |
+
os.path.dirname(os.path.dirname(f)))) for f in self.image_files])
|
101 |
+
train_files = []
|
102 |
+
test_files = []
|
103 |
+
for scene in scenes:
|
104 |
+
scene_files = [f for f in self.image_files if os.path.basename(
|
105 |
+
os.path.dirname(os.path.dirname(os.path.dirname(f)))) == scene]
|
106 |
+
random.shuffle(scene_files)
|
107 |
+
train_files.extend(scene_files[:int(len(scene_files) * 0.92)])
|
108 |
+
test_files.extend(scene_files[int(len(scene_files) * 0.92):])
|
109 |
+
with open(os.path.join(data_dir_root, "train.txt"), "w") as f:
|
110 |
+
f.write("\n".join(train_files))
|
111 |
+
with open(os.path.join(data_dir_root, "test.txt"), "w") as f:
|
112 |
+
f.write("\n".join(test_files))
|
113 |
+
|
114 |
+
if split == "train":
|
115 |
+
with open(os.path.join(data_dir_root, "train.txt"), "r") as f:
|
116 |
+
self.image_files = f.read().splitlines()
|
117 |
+
self.depth_files = [r.replace("/rgb/", "/depth/").replace(
|
118 |
+
"rgb_", "depth_").replace(".jpg", ".png") for r in self.image_files]
|
119 |
+
elif split == "test":
|
120 |
+
with open(os.path.join(data_dir_root, "test.txt"), "r") as f:
|
121 |
+
self.image_files = f.read().splitlines()
|
122 |
+
self.depth_files = [r.replace("/rgb/", "/depth/").replace(
|
123 |
+
"rgb_", "depth_").replace(".jpg", ".png") for r in self.image_files]
|
124 |
+
|
125 |
+
def __getitem__(self, idx):
|
126 |
+
image_path = self.image_files[idx]
|
127 |
+
depth_path = self.depth_files[idx]
|
128 |
+
|
129 |
+
image = Image.open(image_path)
|
130 |
+
# depth = Image.open(depth_path)
|
131 |
+
depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR |
|
132 |
+
cv2.IMREAD_ANYDEPTH) / 100.0 # cm to m
|
133 |
+
depth = Image.fromarray(depth)
|
134 |
+
# print("dpeth min max", depth.min(), depth.max())
|
135 |
+
|
136 |
+
# print(np.shape(image))
|
137 |
+
# print(np.shape(depth))
|
138 |
+
|
139 |
+
if self.do_kb_crop:
|
140 |
+
if idx == 0:
|
141 |
+
print("Using KB input crop")
|
142 |
+
height = image.height
|
143 |
+
width = image.width
|
144 |
+
top_margin = int(height - 352)
|
145 |
+
left_margin = int((width - 1216) / 2)
|
146 |
+
depth = depth.crop(
|
147 |
+
(left_margin, top_margin, left_margin + 1216, top_margin + 352))
|
148 |
+
image = image.crop(
|
149 |
+
(left_margin, top_margin, left_margin + 1216, top_margin + 352))
|
150 |
+
# uv = uv[:, top_margin:top_margin + 352, left_margin:left_margin + 1216]
|
151 |
+
|
152 |
+
image = np.asarray(image, dtype=np.float32) / 255.0
|
153 |
+
# depth = np.asarray(depth, dtype=np.uint16) /1.
|
154 |
+
depth = np.asarray(depth, dtype=np.float32) / 1.
|
155 |
+
depth[depth > 80] = -1
|
156 |
+
|
157 |
+
depth = depth[..., None]
|
158 |
+
sample = dict(image=image, depth=depth)
|
159 |
+
|
160 |
+
# return sample
|
161 |
+
sample = self.transform(sample)
|
162 |
+
|
163 |
+
if idx == 0:
|
164 |
+
print(sample["image"].shape)
|
165 |
+
|
166 |
+
return sample
|
167 |
+
|
168 |
+
def __len__(self):
|
169 |
+
return len(self.image_files)
|
170 |
+
|
171 |
+
|
172 |
+
def get_vkitti2_loader(data_dir_root, batch_size=1, **kwargs):
|
173 |
+
dataset = VKITTI2(data_dir_root)
|
174 |
+
return DataLoader(dataset, batch_size, **kwargs)
|
175 |
+
|
176 |
+
|
177 |
+
if __name__ == "__main__":
|
178 |
+
loader = get_vkitti2_loader(
|
179 |
+
data_dir_root="/home/bhatsf/shortcuts/datasets/vkitti2")
|
180 |
+
print("Total files", len(loader.dataset))
|
181 |
+
for i, sample in enumerate(loader):
|
182 |
+
print(sample["image"].shape)
|
183 |
+
print(sample["depth"].shape)
|
184 |
+
print(sample["dataset"])
|
185 |
+
print(sample['depth'].min(), sample['depth'].max())
|
186 |
+
if i > 5:
|
187 |
+
break
|
metric_depth/zoedepth/models/__init__.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
metric_depth/zoedepth/models/base_models/__init__.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
metric_depth/zoedepth/models/base_models/depth_anything.py
ADDED
@@ -0,0 +1,376 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import torch
|
26 |
+
import torch.nn as nn
|
27 |
+
import numpy as np
|
28 |
+
from torchvision.transforms import Normalize
|
29 |
+
from zoedepth.models.base_models.dpt_dinov2.dpt import DPT_DINOv2
|
30 |
+
|
31 |
+
|
32 |
+
def denormalize(x):
|
33 |
+
"""Reverses the imagenet normalization applied to the input.
|
34 |
+
|
35 |
+
Args:
|
36 |
+
x (torch.Tensor - shape(N,3,H,W)): input tensor
|
37 |
+
|
38 |
+
Returns:
|
39 |
+
torch.Tensor - shape(N,3,H,W): Denormalized input
|
40 |
+
"""
|
41 |
+
mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device)
|
42 |
+
std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device)
|
43 |
+
return x * std + mean
|
44 |
+
|
45 |
+
def get_activation(name, bank):
|
46 |
+
def hook(model, input, output):
|
47 |
+
bank[name] = output
|
48 |
+
return hook
|
49 |
+
|
50 |
+
|
51 |
+
class Resize(object):
|
52 |
+
"""Resize sample to given size (width, height).
|
53 |
+
"""
|
54 |
+
|
55 |
+
def __init__(
|
56 |
+
self,
|
57 |
+
width,
|
58 |
+
height,
|
59 |
+
resize_target=True,
|
60 |
+
keep_aspect_ratio=False,
|
61 |
+
ensure_multiple_of=1,
|
62 |
+
resize_method="lower_bound",
|
63 |
+
):
|
64 |
+
"""Init.
|
65 |
+
Args:
|
66 |
+
width (int): desired output width
|
67 |
+
height (int): desired output height
|
68 |
+
resize_target (bool, optional):
|
69 |
+
True: Resize the full sample (image, mask, target).
|
70 |
+
False: Resize image only.
|
71 |
+
Defaults to True.
|
72 |
+
keep_aspect_ratio (bool, optional):
|
73 |
+
True: Keep the aspect ratio of the input sample.
|
74 |
+
Output sample might not have the given width and height, and
|
75 |
+
resize behaviour depends on the parameter 'resize_method'.
|
76 |
+
Defaults to False.
|
77 |
+
ensure_multiple_of (int, optional):
|
78 |
+
Output width and height is constrained to be multiple of this parameter.
|
79 |
+
Defaults to 1.
|
80 |
+
resize_method (str, optional):
|
81 |
+
"lower_bound": Output will be at least as large as the given size.
|
82 |
+
"upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
|
83 |
+
"minimal": Scale as least as possible. (Output size might be smaller than given size.)
|
84 |
+
Defaults to "lower_bound".
|
85 |
+
"""
|
86 |
+
print("Params passed to Resize transform:")
|
87 |
+
print("\twidth: ", width)
|
88 |
+
print("\theight: ", height)
|
89 |
+
print("\tresize_target: ", resize_target)
|
90 |
+
print("\tkeep_aspect_ratio: ", keep_aspect_ratio)
|
91 |
+
print("\tensure_multiple_of: ", ensure_multiple_of)
|
92 |
+
print("\tresize_method: ", resize_method)
|
93 |
+
|
94 |
+
self.__width = width
|
95 |
+
self.__height = height
|
96 |
+
|
97 |
+
self.__keep_aspect_ratio = keep_aspect_ratio
|
98 |
+
self.__multiple_of = ensure_multiple_of
|
99 |
+
self.__resize_method = resize_method
|
100 |
+
|
101 |
+
def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
|
102 |
+
y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
103 |
+
|
104 |
+
if max_val is not None and y > max_val:
|
105 |
+
y = (np.floor(x / self.__multiple_of)
|
106 |
+
* self.__multiple_of).astype(int)
|
107 |
+
|
108 |
+
if y < min_val:
|
109 |
+
y = (np.ceil(x / self.__multiple_of)
|
110 |
+
* self.__multiple_of).astype(int)
|
111 |
+
|
112 |
+
return y
|
113 |
+
|
114 |
+
def get_size(self, width, height):
|
115 |
+
# determine new height and width
|
116 |
+
scale_height = self.__height / height
|
117 |
+
scale_width = self.__width / width
|
118 |
+
|
119 |
+
if self.__keep_aspect_ratio:
|
120 |
+
if self.__resize_method == "lower_bound":
|
121 |
+
# scale such that output size is lower bound
|
122 |
+
if scale_width > scale_height:
|
123 |
+
# fit width
|
124 |
+
scale_height = scale_width
|
125 |
+
else:
|
126 |
+
# fit height
|
127 |
+
scale_width = scale_height
|
128 |
+
elif self.__resize_method == "upper_bound":
|
129 |
+
# scale such that output size is upper bound
|
130 |
+
if scale_width < scale_height:
|
131 |
+
# fit width
|
132 |
+
scale_height = scale_width
|
133 |
+
else:
|
134 |
+
# fit height
|
135 |
+
scale_width = scale_height
|
136 |
+
elif self.__resize_method == "minimal":
|
137 |
+
# scale as least as possbile
|
138 |
+
if abs(1 - scale_width) < abs(1 - scale_height):
|
139 |
+
# fit width
|
140 |
+
scale_height = scale_width
|
141 |
+
else:
|
142 |
+
# fit height
|
143 |
+
scale_width = scale_height
|
144 |
+
else:
|
145 |
+
raise ValueError(
|
146 |
+
f"resize_method {self.__resize_method} not implemented"
|
147 |
+
)
|
148 |
+
|
149 |
+
if self.__resize_method == "lower_bound":
|
150 |
+
new_height = self.constrain_to_multiple_of(
|
151 |
+
scale_height * height, min_val=self.__height
|
152 |
+
)
|
153 |
+
new_width = self.constrain_to_multiple_of(
|
154 |
+
scale_width * width, min_val=self.__width
|
155 |
+
)
|
156 |
+
elif self.__resize_method == "upper_bound":
|
157 |
+
new_height = self.constrain_to_multiple_of(
|
158 |
+
scale_height * height, max_val=self.__height
|
159 |
+
)
|
160 |
+
new_width = self.constrain_to_multiple_of(
|
161 |
+
scale_width * width, max_val=self.__width
|
162 |
+
)
|
163 |
+
elif self.__resize_method == "minimal":
|
164 |
+
new_height = self.constrain_to_multiple_of(scale_height * height)
|
165 |
+
new_width = self.constrain_to_multiple_of(scale_width * width)
|
166 |
+
else:
|
167 |
+
raise ValueError(
|
168 |
+
f"resize_method {self.__resize_method} not implemented")
|
169 |
+
|
170 |
+
return (new_width, new_height)
|
171 |
+
|
172 |
+
def __call__(self, x):
|
173 |
+
width, height = self.get_size(*x.shape[-2:][::-1])
|
174 |
+
return nn.functional.interpolate(x, (height, width), mode='bilinear', align_corners=True)
|
175 |
+
|
176 |
+
class PrepForMidas(object):
|
177 |
+
def __init__(self, resize_mode="minimal", keep_aspect_ratio=True, img_size=384, do_resize=True):
|
178 |
+
if isinstance(img_size, int):
|
179 |
+
img_size = (img_size, img_size)
|
180 |
+
net_h, net_w = img_size
|
181 |
+
# self.normalization = Normalize(
|
182 |
+
# mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
|
183 |
+
self.normalization = Normalize(
|
184 |
+
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
185 |
+
self.resizer = Resize(net_w, net_h, keep_aspect_ratio=keep_aspect_ratio, ensure_multiple_of=14, resize_method=resize_mode) \
|
186 |
+
if do_resize else nn.Identity()
|
187 |
+
|
188 |
+
def __call__(self, x):
|
189 |
+
return self.normalization(self.resizer(x))
|
190 |
+
|
191 |
+
|
192 |
+
class DepthAnythingCore(nn.Module):
|
193 |
+
def __init__(self, midas, trainable=False, fetch_features=True, layer_names=('out_conv', 'l4_rn', 'r4', 'r3', 'r2', 'r1'), freeze_bn=False, keep_aspect_ratio=True,
|
194 |
+
img_size=384, **kwargs):
|
195 |
+
"""Midas Base model used for multi-scale feature extraction.
|
196 |
+
|
197 |
+
Args:
|
198 |
+
midas (torch.nn.Module): Midas model.
|
199 |
+
trainable (bool, optional): Train midas model. Defaults to False.
|
200 |
+
fetch_features (bool, optional): Extract multi-scale features. Defaults to True.
|
201 |
+
layer_names (tuple, optional): Layers used for feature extraction. Order = (head output features, last layer features, ...decoder features). Defaults to ('out_conv', 'l4_rn', 'r4', 'r3', 'r2', 'r1').
|
202 |
+
freeze_bn (bool, optional): Freeze BatchNorm. Generally results in better finetuning performance. Defaults to False.
|
203 |
+
keep_aspect_ratio (bool, optional): Keep the aspect ratio of input images while resizing. Defaults to True.
|
204 |
+
img_size (int, tuple, optional): Input resolution. Defaults to 384.
|
205 |
+
"""
|
206 |
+
super().__init__()
|
207 |
+
self.core = midas
|
208 |
+
self.output_channels = None
|
209 |
+
self.core_out = {}
|
210 |
+
self.trainable = trainable
|
211 |
+
self.fetch_features = fetch_features
|
212 |
+
# midas.scratch.output_conv = nn.Identity()
|
213 |
+
self.handles = []
|
214 |
+
# self.layer_names = ['out_conv','l4_rn', 'r4', 'r3', 'r2', 'r1']
|
215 |
+
self.layer_names = layer_names
|
216 |
+
|
217 |
+
self.set_trainable(trainable)
|
218 |
+
self.set_fetch_features(fetch_features)
|
219 |
+
|
220 |
+
self.prep = PrepForMidas(keep_aspect_ratio=keep_aspect_ratio,
|
221 |
+
img_size=img_size, do_resize=kwargs.get('do_resize', True))
|
222 |
+
|
223 |
+
if freeze_bn:
|
224 |
+
self.freeze_bn()
|
225 |
+
|
226 |
+
def set_trainable(self, trainable):
|
227 |
+
self.trainable = trainable
|
228 |
+
if trainable:
|
229 |
+
self.unfreeze()
|
230 |
+
else:
|
231 |
+
self.freeze()
|
232 |
+
return self
|
233 |
+
|
234 |
+
def set_fetch_features(self, fetch_features):
|
235 |
+
self.fetch_features = fetch_features
|
236 |
+
if fetch_features:
|
237 |
+
if len(self.handles) == 0:
|
238 |
+
self.attach_hooks(self.core)
|
239 |
+
else:
|
240 |
+
self.remove_hooks()
|
241 |
+
return self
|
242 |
+
|
243 |
+
def freeze(self):
|
244 |
+
for p in self.parameters():
|
245 |
+
p.requires_grad = False
|
246 |
+
self.trainable = False
|
247 |
+
return self
|
248 |
+
|
249 |
+
def unfreeze(self):
|
250 |
+
for p in self.parameters():
|
251 |
+
p.requires_grad = True
|
252 |
+
self.trainable = True
|
253 |
+
return self
|
254 |
+
|
255 |
+
def freeze_bn(self):
|
256 |
+
for m in self.modules():
|
257 |
+
if isinstance(m, nn.BatchNorm2d):
|
258 |
+
m.eval()
|
259 |
+
return self
|
260 |
+
|
261 |
+
def forward(self, x, denorm=False, return_rel_depth=False):
|
262 |
+
# print('input to midas:', x.shape)
|
263 |
+
with torch.no_grad():
|
264 |
+
if denorm:
|
265 |
+
x = denormalize(x)
|
266 |
+
x = self.prep(x)
|
267 |
+
|
268 |
+
with torch.set_grad_enabled(self.trainable):
|
269 |
+
|
270 |
+
rel_depth = self.core(x)
|
271 |
+
if not self.fetch_features:
|
272 |
+
return rel_depth
|
273 |
+
out = [self.core_out[k] for k in self.layer_names]
|
274 |
+
|
275 |
+
if return_rel_depth:
|
276 |
+
return rel_depth, out
|
277 |
+
return out
|
278 |
+
|
279 |
+
def get_rel_pos_params(self):
|
280 |
+
for name, p in self.core.pretrained.named_parameters():
|
281 |
+
if "pos_embed" in name:
|
282 |
+
yield p
|
283 |
+
|
284 |
+
def get_enc_params_except_rel_pos(self):
|
285 |
+
for name, p in self.core.pretrained.named_parameters():
|
286 |
+
if "pos_embed" not in name:
|
287 |
+
yield p
|
288 |
+
|
289 |
+
def freeze_encoder(self, freeze_rel_pos=False):
|
290 |
+
if freeze_rel_pos:
|
291 |
+
for p in self.core.pretrained.parameters():
|
292 |
+
p.requires_grad = False
|
293 |
+
else:
|
294 |
+
for p in self.get_enc_params_except_rel_pos():
|
295 |
+
p.requires_grad = False
|
296 |
+
return self
|
297 |
+
|
298 |
+
def attach_hooks(self, midas):
|
299 |
+
if len(self.handles) > 0:
|
300 |
+
self.remove_hooks()
|
301 |
+
if "out_conv" in self.layer_names:
|
302 |
+
self.handles.append(list(midas.depth_head.scratch.output_conv2.children())[
|
303 |
+
1].register_forward_hook(get_activation("out_conv", self.core_out)))
|
304 |
+
if "r4" in self.layer_names:
|
305 |
+
self.handles.append(midas.depth_head.scratch.refinenet4.register_forward_hook(
|
306 |
+
get_activation("r4", self.core_out)))
|
307 |
+
if "r3" in self.layer_names:
|
308 |
+
self.handles.append(midas.depth_head.scratch.refinenet3.register_forward_hook(
|
309 |
+
get_activation("r3", self.core_out)))
|
310 |
+
if "r2" in self.layer_names:
|
311 |
+
self.handles.append(midas.depth_head.scratch.refinenet2.register_forward_hook(
|
312 |
+
get_activation("r2", self.core_out)))
|
313 |
+
if "r1" in self.layer_names:
|
314 |
+
self.handles.append(midas.depth_head.scratch.refinenet1.register_forward_hook(
|
315 |
+
get_activation("r1", self.core_out)))
|
316 |
+
if "l4_rn" in self.layer_names:
|
317 |
+
self.handles.append(midas.depth_head.scratch.layer4_rn.register_forward_hook(
|
318 |
+
get_activation("l4_rn", self.core_out)))
|
319 |
+
|
320 |
+
return self
|
321 |
+
|
322 |
+
def remove_hooks(self):
|
323 |
+
for h in self.handles:
|
324 |
+
h.remove()
|
325 |
+
return self
|
326 |
+
|
327 |
+
def __del__(self):
|
328 |
+
self.remove_hooks()
|
329 |
+
|
330 |
+
def set_output_channels(self):
|
331 |
+
self.output_channels = [256, 256, 256, 256, 256]
|
332 |
+
|
333 |
+
@staticmethod
|
334 |
+
def build(midas_model_type="dinov2_large", train_midas=False, use_pretrained_midas=True, fetch_features=False, freeze_bn=True, force_keep_ar=False, force_reload=False, **kwargs):
|
335 |
+
if "img_size" in kwargs:
|
336 |
+
kwargs = DepthAnythingCore.parse_img_size(kwargs)
|
337 |
+
img_size = kwargs.pop("img_size", [384, 384])
|
338 |
+
|
339 |
+
depth_anything = DPT_DINOv2(out_channels=[256, 512, 1024, 1024], use_clstoken=False)
|
340 |
+
|
341 |
+
state_dict = torch.load('./checkpoints/depth_anything_vitl14.pth', map_location='cpu')
|
342 |
+
depth_anything.load_state_dict(state_dict)
|
343 |
+
|
344 |
+
kwargs.update({'keep_aspect_ratio': force_keep_ar})
|
345 |
+
|
346 |
+
depth_anything_core = DepthAnythingCore(depth_anything, trainable=train_midas, fetch_features=fetch_features,
|
347 |
+
freeze_bn=freeze_bn, img_size=img_size, **kwargs)
|
348 |
+
|
349 |
+
depth_anything_core.set_output_channels()
|
350 |
+
return depth_anything_core
|
351 |
+
|
352 |
+
@staticmethod
|
353 |
+
def parse_img_size(config):
|
354 |
+
assert 'img_size' in config
|
355 |
+
if isinstance(config['img_size'], str):
|
356 |
+
assert "," in config['img_size'], "img_size should be a string with comma separated img_size=H,W"
|
357 |
+
config['img_size'] = list(map(int, config['img_size'].split(",")))
|
358 |
+
assert len(
|
359 |
+
config['img_size']) == 2, "img_size should be a string with comma separated img_size=H,W"
|
360 |
+
elif isinstance(config['img_size'], int):
|
361 |
+
config['img_size'] = [config['img_size'], config['img_size']]
|
362 |
+
else:
|
363 |
+
assert isinstance(config['img_size'], list) and len(
|
364 |
+
config['img_size']) == 2, "img_size should be a list of H,W"
|
365 |
+
return config
|
366 |
+
|
367 |
+
|
368 |
+
nchannels2models = {
|
369 |
+
tuple([256]*5): ["DPT_BEiT_L_384", "DPT_BEiT_L_512", "DPT_BEiT_B_384", "DPT_SwinV2_L_384", "DPT_SwinV2_B_384", "DPT_SwinV2_T_256", "DPT_Large", "DPT_Hybrid"],
|
370 |
+
(512, 256, 128, 64, 64): ["MiDaS_small"]
|
371 |
+
}
|
372 |
+
|
373 |
+
# Model name to number of output channels
|
374 |
+
MIDAS_SETTINGS = {m: k for k, v in nchannels2models.items()
|
375 |
+
for m in v
|
376 |
+
}
|
metric_depth/zoedepth/models/base_models/dpt_dinov2/blocks.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch.nn as nn
|
2 |
+
|
3 |
+
|
4 |
+
def _make_scratch(in_shape, out_shape, groups=1, expand=False):
|
5 |
+
scratch = nn.Module()
|
6 |
+
|
7 |
+
out_shape1 = out_shape
|
8 |
+
out_shape2 = out_shape
|
9 |
+
out_shape3 = out_shape
|
10 |
+
if len(in_shape) >= 4:
|
11 |
+
out_shape4 = out_shape
|
12 |
+
|
13 |
+
if expand:
|
14 |
+
out_shape1 = out_shape
|
15 |
+
out_shape2 = out_shape*2
|
16 |
+
out_shape3 = out_shape*4
|
17 |
+
if len(in_shape) >= 4:
|
18 |
+
out_shape4 = out_shape*8
|
19 |
+
|
20 |
+
scratch.layer1_rn = nn.Conv2d(
|
21 |
+
in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
|
22 |
+
)
|
23 |
+
scratch.layer2_rn = nn.Conv2d(
|
24 |
+
in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
|
25 |
+
)
|
26 |
+
scratch.layer3_rn = nn.Conv2d(
|
27 |
+
in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
|
28 |
+
)
|
29 |
+
if len(in_shape) >= 4:
|
30 |
+
scratch.layer4_rn = nn.Conv2d(
|
31 |
+
in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
|
32 |
+
)
|
33 |
+
|
34 |
+
return scratch
|
35 |
+
|
36 |
+
|
37 |
+
class ResidualConvUnit(nn.Module):
|
38 |
+
"""Residual convolution module.
|
39 |
+
"""
|
40 |
+
|
41 |
+
def __init__(self, features, activation, bn):
|
42 |
+
"""Init.
|
43 |
+
|
44 |
+
Args:
|
45 |
+
features (int): number of features
|
46 |
+
"""
|
47 |
+
super().__init__()
|
48 |
+
|
49 |
+
self.bn = bn
|
50 |
+
|
51 |
+
self.groups=1
|
52 |
+
|
53 |
+
self.conv1 = nn.Conv2d(
|
54 |
+
features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups
|
55 |
+
)
|
56 |
+
|
57 |
+
self.conv2 = nn.Conv2d(
|
58 |
+
features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups
|
59 |
+
)
|
60 |
+
|
61 |
+
if self.bn==True:
|
62 |
+
self.bn1 = nn.BatchNorm2d(features)
|
63 |
+
self.bn2 = nn.BatchNorm2d(features)
|
64 |
+
|
65 |
+
self.activation = activation
|
66 |
+
|
67 |
+
self.skip_add = nn.quantized.FloatFunctional()
|
68 |
+
|
69 |
+
def forward(self, x):
|
70 |
+
"""Forward pass.
|
71 |
+
|
72 |
+
Args:
|
73 |
+
x (tensor): input
|
74 |
+
|
75 |
+
Returns:
|
76 |
+
tensor: output
|
77 |
+
"""
|
78 |
+
|
79 |
+
out = self.activation(x)
|
80 |
+
out = self.conv1(out)
|
81 |
+
if self.bn==True:
|
82 |
+
out = self.bn1(out)
|
83 |
+
|
84 |
+
out = self.activation(out)
|
85 |
+
out = self.conv2(out)
|
86 |
+
if self.bn==True:
|
87 |
+
out = self.bn2(out)
|
88 |
+
|
89 |
+
if self.groups > 1:
|
90 |
+
out = self.conv_merge(out)
|
91 |
+
|
92 |
+
return self.skip_add.add(out, x)
|
93 |
+
|
94 |
+
|
95 |
+
class FeatureFusionBlock(nn.Module):
|
96 |
+
"""Feature fusion block.
|
97 |
+
"""
|
98 |
+
|
99 |
+
def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True, size=None):
|
100 |
+
"""Init.
|
101 |
+
|
102 |
+
Args:
|
103 |
+
features (int): number of features
|
104 |
+
"""
|
105 |
+
super(FeatureFusionBlock, self).__init__()
|
106 |
+
|
107 |
+
self.deconv = deconv
|
108 |
+
self.align_corners = align_corners
|
109 |
+
|
110 |
+
self.groups=1
|
111 |
+
|
112 |
+
self.expand = expand
|
113 |
+
out_features = features
|
114 |
+
if self.expand==True:
|
115 |
+
out_features = features//2
|
116 |
+
|
117 |
+
self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
|
118 |
+
|
119 |
+
self.resConfUnit1 = ResidualConvUnit(features, activation, bn)
|
120 |
+
self.resConfUnit2 = ResidualConvUnit(features, activation, bn)
|
121 |
+
|
122 |
+
self.skip_add = nn.quantized.FloatFunctional()
|
123 |
+
|
124 |
+
self.size=size
|
125 |
+
|
126 |
+
def forward(self, *xs, size=None):
|
127 |
+
"""Forward pass.
|
128 |
+
|
129 |
+
Returns:
|
130 |
+
tensor: output
|
131 |
+
"""
|
132 |
+
output = xs[0]
|
133 |
+
|
134 |
+
if len(xs) == 2:
|
135 |
+
res = self.resConfUnit1(xs[1])
|
136 |
+
output = self.skip_add.add(output, res)
|
137 |
+
|
138 |
+
output = self.resConfUnit2(output)
|
139 |
+
|
140 |
+
if (size is None) and (self.size is None):
|
141 |
+
modifier = {"scale_factor": 2}
|
142 |
+
elif size is None:
|
143 |
+
modifier = {"size": self.size}
|
144 |
+
else:
|
145 |
+
modifier = {"size": size}
|
146 |
+
|
147 |
+
output = nn.functional.interpolate(
|
148 |
+
output, **modifier, mode="bilinear", align_corners=self.align_corners
|
149 |
+
)
|
150 |
+
|
151 |
+
output = self.out_conv(output)
|
152 |
+
|
153 |
+
return output
|
metric_depth/zoedepth/models/base_models/dpt_dinov2/dpt.py
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
|
4 |
+
from .blocks import FeatureFusionBlock, _make_scratch
|
5 |
+
import torch.nn.functional as F
|
6 |
+
|
7 |
+
|
8 |
+
def _make_fusion_block(features, use_bn, size = None):
|
9 |
+
return FeatureFusionBlock(
|
10 |
+
features,
|
11 |
+
nn.ReLU(False),
|
12 |
+
deconv=False,
|
13 |
+
bn=use_bn,
|
14 |
+
expand=False,
|
15 |
+
align_corners=True,
|
16 |
+
size=size,
|
17 |
+
)
|
18 |
+
|
19 |
+
|
20 |
+
class DPTHead(nn.Module):
|
21 |
+
def __init__(self, in_channels, features=256, use_bn=False, out_channels=[256, 512, 1024, 1024], use_clstoken=False):
|
22 |
+
super(DPTHead, self).__init__()
|
23 |
+
|
24 |
+
self.use_clstoken = use_clstoken
|
25 |
+
|
26 |
+
# out_channels = [in_channels // 8, in_channels // 4, in_channels // 2, in_channels]
|
27 |
+
# out_channels = [in_channels // 4, in_channels // 2, in_channels, in_channels]
|
28 |
+
# out_channels = [in_channels, in_channels, in_channels, in_channels]
|
29 |
+
|
30 |
+
self.projects = nn.ModuleList([
|
31 |
+
nn.Conv2d(
|
32 |
+
in_channels=in_channels,
|
33 |
+
out_channels=out_channel,
|
34 |
+
kernel_size=1,
|
35 |
+
stride=1,
|
36 |
+
padding=0,
|
37 |
+
) for out_channel in out_channels
|
38 |
+
])
|
39 |
+
|
40 |
+
self.resize_layers = nn.ModuleList([
|
41 |
+
nn.ConvTranspose2d(
|
42 |
+
in_channels=out_channels[0],
|
43 |
+
out_channels=out_channels[0],
|
44 |
+
kernel_size=4,
|
45 |
+
stride=4,
|
46 |
+
padding=0),
|
47 |
+
nn.ConvTranspose2d(
|
48 |
+
in_channels=out_channels[1],
|
49 |
+
out_channels=out_channels[1],
|
50 |
+
kernel_size=2,
|
51 |
+
stride=2,
|
52 |
+
padding=0),
|
53 |
+
nn.Identity(),
|
54 |
+
nn.Conv2d(
|
55 |
+
in_channels=out_channels[3],
|
56 |
+
out_channels=out_channels[3],
|
57 |
+
kernel_size=3,
|
58 |
+
stride=2,
|
59 |
+
padding=1)
|
60 |
+
])
|
61 |
+
|
62 |
+
if use_clstoken:
|
63 |
+
self.readout_projects = nn.ModuleList()
|
64 |
+
for _ in range(len(self.projects)):
|
65 |
+
self.readout_projects.append(
|
66 |
+
nn.Sequential(
|
67 |
+
nn.Linear(2 * in_channels, in_channels),
|
68 |
+
nn.GELU()))
|
69 |
+
|
70 |
+
self.scratch = _make_scratch(
|
71 |
+
out_channels,
|
72 |
+
features,
|
73 |
+
groups=1,
|
74 |
+
expand=False,
|
75 |
+
)
|
76 |
+
|
77 |
+
self.scratch.stem_transpose = None
|
78 |
+
|
79 |
+
self.scratch.refinenet1 = _make_fusion_block(features, use_bn)
|
80 |
+
self.scratch.refinenet2 = _make_fusion_block(features, use_bn)
|
81 |
+
self.scratch.refinenet3 = _make_fusion_block(features, use_bn)
|
82 |
+
self.scratch.refinenet4 = _make_fusion_block(features, use_bn)
|
83 |
+
|
84 |
+
head_features_1 = features
|
85 |
+
head_features_2 = 32
|
86 |
+
|
87 |
+
self.scratch.output_conv1 = nn.Conv2d(head_features_1, head_features_1 // 2, kernel_size=3, stride=1, padding=1)
|
88 |
+
|
89 |
+
self.scratch.output_conv2 = nn.Sequential(
|
90 |
+
nn.Conv2d(head_features_1 // 2, head_features_2, kernel_size=3, stride=1, padding=1),
|
91 |
+
nn.ReLU(True),
|
92 |
+
nn.Conv2d(head_features_2, 1, kernel_size=1, stride=1, padding=0),
|
93 |
+
nn.ReLU(True),
|
94 |
+
nn.Identity(),
|
95 |
+
)
|
96 |
+
|
97 |
+
def forward(self, out_features, patch_h, patch_w):
|
98 |
+
out = []
|
99 |
+
for i, x in enumerate(out_features):
|
100 |
+
if self.use_clstoken:
|
101 |
+
x, cls_token = x[0], x[1]
|
102 |
+
readout = cls_token.unsqueeze(1).expand_as(x)
|
103 |
+
x = self.readout_projects[i](torch.cat((x, readout), -1))
|
104 |
+
else:
|
105 |
+
x = x[0]
|
106 |
+
|
107 |
+
x = x.permute(0, 2, 1).reshape((x.shape[0], x.shape[-1], patch_h, patch_w))
|
108 |
+
|
109 |
+
x = self.projects[i](x)
|
110 |
+
x = self.resize_layers[i](x)
|
111 |
+
|
112 |
+
out.append(x)
|
113 |
+
|
114 |
+
layer_1, layer_2, layer_3, layer_4 = out
|
115 |
+
|
116 |
+
layer_1_rn = self.scratch.layer1_rn(layer_1)
|
117 |
+
layer_2_rn = self.scratch.layer2_rn(layer_2)
|
118 |
+
layer_3_rn = self.scratch.layer3_rn(layer_3)
|
119 |
+
layer_4_rn = self.scratch.layer4_rn(layer_4)
|
120 |
+
|
121 |
+
path_4 = self.scratch.refinenet4(layer_4_rn, size=layer_3_rn.shape[2:])
|
122 |
+
path_3 = self.scratch.refinenet3(path_4, layer_3_rn, size=layer_2_rn.shape[2:])
|
123 |
+
path_2 = self.scratch.refinenet2(path_3, layer_2_rn, size=layer_1_rn.shape[2:])
|
124 |
+
path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
|
125 |
+
|
126 |
+
out = self.scratch.output_conv1(path_1)
|
127 |
+
out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True)
|
128 |
+
out = self.scratch.output_conv2(out)
|
129 |
+
|
130 |
+
return out
|
131 |
+
|
132 |
+
|
133 |
+
class DPT_DINOv2(nn.Module):
|
134 |
+
def __init__(self, encoder='vitl', features=256, use_bn=False, out_channels=[256, 512, 1024, 1024], use_clstoken=False):
|
135 |
+
|
136 |
+
super(DPT_DINOv2, self).__init__()
|
137 |
+
|
138 |
+
torch.manual_seed(1)
|
139 |
+
|
140 |
+
self.pretrained = torch.hub.load('../torchhub/facebookresearch_dinov2_main', 'dinov2_{:}14'.format(encoder), source='local', pretrained=False)
|
141 |
+
|
142 |
+
dim = self.pretrained.blocks[0].attn.qkv.in_features
|
143 |
+
|
144 |
+
self.depth_head = DPTHead(dim, features, use_bn, out_channels=out_channels, use_clstoken=use_clstoken)
|
145 |
+
|
146 |
+
def forward(self, x):
|
147 |
+
h, w = x.shape[-2:]
|
148 |
+
|
149 |
+
features = self.pretrained.get_intermediate_layers(x, 4, return_class_token=True)
|
150 |
+
|
151 |
+
patch_h, patch_w = h // 14, w // 14
|
152 |
+
|
153 |
+
depth = self.depth_head(features, patch_h, patch_w)
|
154 |
+
depth = F.interpolate(depth, size=(h, w), mode="bilinear", align_corners=True)
|
155 |
+
depth = F.relu(depth)
|
156 |
+
|
157 |
+
return depth.squeeze(1)
|
metric_depth/zoedepth/models/base_models/midas.py
ADDED
@@ -0,0 +1,380 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import torch
|
26 |
+
import torch.nn as nn
|
27 |
+
import numpy as np
|
28 |
+
from torchvision.transforms import Normalize
|
29 |
+
|
30 |
+
|
31 |
+
def denormalize(x):
|
32 |
+
"""Reverses the imagenet normalization applied to the input.
|
33 |
+
|
34 |
+
Args:
|
35 |
+
x (torch.Tensor - shape(N,3,H,W)): input tensor
|
36 |
+
|
37 |
+
Returns:
|
38 |
+
torch.Tensor - shape(N,3,H,W): Denormalized input
|
39 |
+
"""
|
40 |
+
mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device)
|
41 |
+
std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device)
|
42 |
+
return x * std + mean
|
43 |
+
|
44 |
+
def get_activation(name, bank):
|
45 |
+
def hook(model, input, output):
|
46 |
+
bank[name] = output
|
47 |
+
return hook
|
48 |
+
|
49 |
+
|
50 |
+
class Resize(object):
|
51 |
+
"""Resize sample to given size (width, height).
|
52 |
+
"""
|
53 |
+
|
54 |
+
def __init__(
|
55 |
+
self,
|
56 |
+
width,
|
57 |
+
height,
|
58 |
+
resize_target=True,
|
59 |
+
keep_aspect_ratio=False,
|
60 |
+
ensure_multiple_of=1,
|
61 |
+
resize_method="lower_bound",
|
62 |
+
):
|
63 |
+
"""Init.
|
64 |
+
Args:
|
65 |
+
width (int): desired output width
|
66 |
+
height (int): desired output height
|
67 |
+
resize_target (bool, optional):
|
68 |
+
True: Resize the full sample (image, mask, target).
|
69 |
+
False: Resize image only.
|
70 |
+
Defaults to True.
|
71 |
+
keep_aspect_ratio (bool, optional):
|
72 |
+
True: Keep the aspect ratio of the input sample.
|
73 |
+
Output sample might not have the given width and height, and
|
74 |
+
resize behaviour depends on the parameter 'resize_method'.
|
75 |
+
Defaults to False.
|
76 |
+
ensure_multiple_of (int, optional):
|
77 |
+
Output width and height is constrained to be multiple of this parameter.
|
78 |
+
Defaults to 1.
|
79 |
+
resize_method (str, optional):
|
80 |
+
"lower_bound": Output will be at least as large as the given size.
|
81 |
+
"upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
|
82 |
+
"minimal": Scale as least as possible. (Output size might be smaller than given size.)
|
83 |
+
Defaults to "lower_bound".
|
84 |
+
"""
|
85 |
+
print("Params passed to Resize transform:")
|
86 |
+
print("\twidth: ", width)
|
87 |
+
print("\theight: ", height)
|
88 |
+
print("\tresize_target: ", resize_target)
|
89 |
+
print("\tkeep_aspect_ratio: ", keep_aspect_ratio)
|
90 |
+
print("\tensure_multiple_of: ", ensure_multiple_of)
|
91 |
+
print("\tresize_method: ", resize_method)
|
92 |
+
|
93 |
+
self.__width = width
|
94 |
+
self.__height = height
|
95 |
+
|
96 |
+
self.__keep_aspect_ratio = keep_aspect_ratio
|
97 |
+
self.__multiple_of = ensure_multiple_of
|
98 |
+
self.__resize_method = resize_method
|
99 |
+
|
100 |
+
def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
|
101 |
+
y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
102 |
+
|
103 |
+
if max_val is not None and y > max_val:
|
104 |
+
y = (np.floor(x / self.__multiple_of)
|
105 |
+
* self.__multiple_of).astype(int)
|
106 |
+
|
107 |
+
if y < min_val:
|
108 |
+
y = (np.ceil(x / self.__multiple_of)
|
109 |
+
* self.__multiple_of).astype(int)
|
110 |
+
|
111 |
+
return y
|
112 |
+
|
113 |
+
def get_size(self, width, height):
|
114 |
+
# determine new height and width
|
115 |
+
scale_height = self.__height / height
|
116 |
+
scale_width = self.__width / width
|
117 |
+
|
118 |
+
if self.__keep_aspect_ratio:
|
119 |
+
if self.__resize_method == "lower_bound":
|
120 |
+
# scale such that output size is lower bound
|
121 |
+
if scale_width > scale_height:
|
122 |
+
# fit width
|
123 |
+
scale_height = scale_width
|
124 |
+
else:
|
125 |
+
# fit height
|
126 |
+
scale_width = scale_height
|
127 |
+
elif self.__resize_method == "upper_bound":
|
128 |
+
# scale such that output size is upper bound
|
129 |
+
if scale_width < scale_height:
|
130 |
+
# fit width
|
131 |
+
scale_height = scale_width
|
132 |
+
else:
|
133 |
+
# fit height
|
134 |
+
scale_width = scale_height
|
135 |
+
elif self.__resize_method == "minimal":
|
136 |
+
# scale as least as possbile
|
137 |
+
if abs(1 - scale_width) < abs(1 - scale_height):
|
138 |
+
# fit width
|
139 |
+
scale_height = scale_width
|
140 |
+
else:
|
141 |
+
# fit height
|
142 |
+
scale_width = scale_height
|
143 |
+
else:
|
144 |
+
raise ValueError(
|
145 |
+
f"resize_method {self.__resize_method} not implemented"
|
146 |
+
)
|
147 |
+
|
148 |
+
if self.__resize_method == "lower_bound":
|
149 |
+
new_height = self.constrain_to_multiple_of(
|
150 |
+
scale_height * height, min_val=self.__height
|
151 |
+
)
|
152 |
+
new_width = self.constrain_to_multiple_of(
|
153 |
+
scale_width * width, min_val=self.__width
|
154 |
+
)
|
155 |
+
elif self.__resize_method == "upper_bound":
|
156 |
+
new_height = self.constrain_to_multiple_of(
|
157 |
+
scale_height * height, max_val=self.__height
|
158 |
+
)
|
159 |
+
new_width = self.constrain_to_multiple_of(
|
160 |
+
scale_width * width, max_val=self.__width
|
161 |
+
)
|
162 |
+
elif self.__resize_method == "minimal":
|
163 |
+
new_height = self.constrain_to_multiple_of(scale_height * height)
|
164 |
+
new_width = self.constrain_to_multiple_of(scale_width * width)
|
165 |
+
else:
|
166 |
+
raise ValueError(
|
167 |
+
f"resize_method {self.__resize_method} not implemented")
|
168 |
+
|
169 |
+
return (new_width, new_height)
|
170 |
+
|
171 |
+
def __call__(self, x):
|
172 |
+
width, height = self.get_size(*x.shape[-2:][::-1])
|
173 |
+
return nn.functional.interpolate(x, (height, width), mode='bilinear', align_corners=True)
|
174 |
+
|
175 |
+
class PrepForMidas(object):
|
176 |
+
def __init__(self, resize_mode="minimal", keep_aspect_ratio=True, img_size=384, do_resize=True):
|
177 |
+
if isinstance(img_size, int):
|
178 |
+
img_size = (img_size, img_size)
|
179 |
+
net_h, net_w = img_size
|
180 |
+
self.normalization = Normalize(
|
181 |
+
mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
|
182 |
+
self.resizer = Resize(net_w, net_h, keep_aspect_ratio=keep_aspect_ratio, ensure_multiple_of=32, resize_method=resize_mode) \
|
183 |
+
if do_resize else nn.Identity()
|
184 |
+
|
185 |
+
def __call__(self, x):
|
186 |
+
return self.normalization(self.resizer(x))
|
187 |
+
|
188 |
+
|
189 |
+
class MidasCore(nn.Module):
|
190 |
+
def __init__(self, midas, trainable=False, fetch_features=True, layer_names=('out_conv', 'l4_rn', 'r4', 'r3', 'r2', 'r1'), freeze_bn=False, keep_aspect_ratio=True,
|
191 |
+
img_size=384, **kwargs):
|
192 |
+
"""Midas Base model used for multi-scale feature extraction.
|
193 |
+
|
194 |
+
Args:
|
195 |
+
midas (torch.nn.Module): Midas model.
|
196 |
+
trainable (bool, optional): Train midas model. Defaults to False.
|
197 |
+
fetch_features (bool, optional): Extract multi-scale features. Defaults to True.
|
198 |
+
layer_names (tuple, optional): Layers used for feature extraction. Order = (head output features, last layer features, ...decoder features). Defaults to ('out_conv', 'l4_rn', 'r4', 'r3', 'r2', 'r1').
|
199 |
+
freeze_bn (bool, optional): Freeze BatchNorm. Generally results in better finetuning performance. Defaults to False.
|
200 |
+
keep_aspect_ratio (bool, optional): Keep the aspect ratio of input images while resizing. Defaults to True.
|
201 |
+
img_size (int, tuple, optional): Input resolution. Defaults to 384.
|
202 |
+
"""
|
203 |
+
super().__init__()
|
204 |
+
self.core = midas
|
205 |
+
self.output_channels = None
|
206 |
+
self.core_out = {}
|
207 |
+
self.trainable = trainable
|
208 |
+
self.fetch_features = fetch_features
|
209 |
+
# midas.scratch.output_conv = nn.Identity()
|
210 |
+
self.handles = []
|
211 |
+
# self.layer_names = ['out_conv','l4_rn', 'r4', 'r3', 'r2', 'r1']
|
212 |
+
self.layer_names = layer_names
|
213 |
+
|
214 |
+
self.set_trainable(trainable)
|
215 |
+
self.set_fetch_features(fetch_features)
|
216 |
+
|
217 |
+
self.prep = PrepForMidas(keep_aspect_ratio=keep_aspect_ratio,
|
218 |
+
img_size=img_size, do_resize=kwargs.get('do_resize', True))
|
219 |
+
|
220 |
+
if freeze_bn:
|
221 |
+
self.freeze_bn()
|
222 |
+
|
223 |
+
def set_trainable(self, trainable):
|
224 |
+
self.trainable = trainable
|
225 |
+
if trainable:
|
226 |
+
self.unfreeze()
|
227 |
+
else:
|
228 |
+
self.freeze()
|
229 |
+
return self
|
230 |
+
|
231 |
+
def set_fetch_features(self, fetch_features):
|
232 |
+
self.fetch_features = fetch_features
|
233 |
+
if fetch_features:
|
234 |
+
if len(self.handles) == 0:
|
235 |
+
self.attach_hooks(self.core)
|
236 |
+
else:
|
237 |
+
self.remove_hooks()
|
238 |
+
return self
|
239 |
+
|
240 |
+
def freeze(self):
|
241 |
+
for p in self.parameters():
|
242 |
+
p.requires_grad = False
|
243 |
+
self.trainable = False
|
244 |
+
return self
|
245 |
+
|
246 |
+
def unfreeze(self):
|
247 |
+
for p in self.parameters():
|
248 |
+
p.requires_grad = True
|
249 |
+
self.trainable = True
|
250 |
+
return self
|
251 |
+
|
252 |
+
def freeze_bn(self):
|
253 |
+
for m in self.modules():
|
254 |
+
if isinstance(m, nn.BatchNorm2d):
|
255 |
+
m.eval()
|
256 |
+
return self
|
257 |
+
|
258 |
+
def forward(self, x, denorm=False, return_rel_depth=False):
|
259 |
+
# print('input to midas:', x.shape)
|
260 |
+
with torch.no_grad():
|
261 |
+
if denorm:
|
262 |
+
x = denormalize(x)
|
263 |
+
x = self.prep(x)
|
264 |
+
# print("Shape after prep: ", x.shape)
|
265 |
+
# print('pre-processed:', x.shape)
|
266 |
+
|
267 |
+
with torch.set_grad_enabled(self.trainable):
|
268 |
+
|
269 |
+
# print("Input size to Midascore", x.shape)
|
270 |
+
rel_depth = self.core(x)
|
271 |
+
# print("Output from midas shape", rel_depth.shape)
|
272 |
+
if not self.fetch_features:
|
273 |
+
return rel_depth
|
274 |
+
out = [self.core_out[k] for k in self.layer_names]
|
275 |
+
|
276 |
+
if return_rel_depth:
|
277 |
+
return rel_depth, out
|
278 |
+
return out
|
279 |
+
|
280 |
+
def get_rel_pos_params(self):
|
281 |
+
for name, p in self.core.pretrained.named_parameters():
|
282 |
+
if "relative_position" in name:
|
283 |
+
yield p
|
284 |
+
|
285 |
+
def get_enc_params_except_rel_pos(self):
|
286 |
+
for name, p in self.core.pretrained.named_parameters():
|
287 |
+
if "relative_position" not in name:
|
288 |
+
yield p
|
289 |
+
|
290 |
+
def freeze_encoder(self, freeze_rel_pos=False):
|
291 |
+
if freeze_rel_pos:
|
292 |
+
for p in self.core.pretrained.parameters():
|
293 |
+
p.requires_grad = False
|
294 |
+
else:
|
295 |
+
for p in self.get_enc_params_except_rel_pos():
|
296 |
+
p.requires_grad = False
|
297 |
+
return self
|
298 |
+
|
299 |
+
def attach_hooks(self, midas):
|
300 |
+
if len(self.handles) > 0:
|
301 |
+
self.remove_hooks()
|
302 |
+
if "out_conv" in self.layer_names:
|
303 |
+
self.handles.append(list(midas.scratch.output_conv.children())[
|
304 |
+
3].register_forward_hook(get_activation("out_conv", self.core_out)))
|
305 |
+
if "r4" in self.layer_names:
|
306 |
+
self.handles.append(midas.scratch.refinenet4.register_forward_hook(
|
307 |
+
get_activation("r4", self.core_out)))
|
308 |
+
if "r3" in self.layer_names:
|
309 |
+
self.handles.append(midas.scratch.refinenet3.register_forward_hook(
|
310 |
+
get_activation("r3", self.core_out)))
|
311 |
+
if "r2" in self.layer_names:
|
312 |
+
self.handles.append(midas.scratch.refinenet2.register_forward_hook(
|
313 |
+
get_activation("r2", self.core_out)))
|
314 |
+
if "r1" in self.layer_names:
|
315 |
+
self.handles.append(midas.scratch.refinenet1.register_forward_hook(
|
316 |
+
get_activation("r1", self.core_out)))
|
317 |
+
if "l4_rn" in self.layer_names:
|
318 |
+
self.handles.append(midas.scratch.layer4_rn.register_forward_hook(
|
319 |
+
get_activation("l4_rn", self.core_out)))
|
320 |
+
|
321 |
+
return self
|
322 |
+
|
323 |
+
def remove_hooks(self):
|
324 |
+
for h in self.handles:
|
325 |
+
h.remove()
|
326 |
+
return self
|
327 |
+
|
328 |
+
def __del__(self):
|
329 |
+
self.remove_hooks()
|
330 |
+
|
331 |
+
def set_output_channels(self, model_type):
|
332 |
+
self.output_channels = MIDAS_SETTINGS[model_type]
|
333 |
+
|
334 |
+
@staticmethod
|
335 |
+
def build(midas_model_type="DPT_BEiT_L_384", train_midas=False, use_pretrained_midas=True, fetch_features=False, freeze_bn=True, force_keep_ar=False, force_reload=False, **kwargs):
|
336 |
+
if midas_model_type not in MIDAS_SETTINGS:
|
337 |
+
raise ValueError(
|
338 |
+
f"Invalid model type: {midas_model_type}. Must be one of {list(MIDAS_SETTINGS.keys())}")
|
339 |
+
if "img_size" in kwargs:
|
340 |
+
kwargs = MidasCore.parse_img_size(kwargs)
|
341 |
+
img_size = kwargs.pop("img_size", [384, 384])
|
342 |
+
# print("img_size", img_size)
|
343 |
+
midas = torch.hub.load("intel-isl/MiDaS", midas_model_type,
|
344 |
+
pretrained=use_pretrained_midas, force_reload=force_reload)
|
345 |
+
kwargs.update({'keep_aspect_ratio': force_keep_ar})
|
346 |
+
midas_core = MidasCore(midas, trainable=train_midas, fetch_features=fetch_features,
|
347 |
+
freeze_bn=freeze_bn, img_size=img_size, **kwargs)
|
348 |
+
midas_core.set_output_channels(midas_model_type)
|
349 |
+
return midas_core
|
350 |
+
|
351 |
+
@staticmethod
|
352 |
+
def build_from_config(config):
|
353 |
+
return MidasCore.build(**config)
|
354 |
+
|
355 |
+
@staticmethod
|
356 |
+
def parse_img_size(config):
|
357 |
+
assert 'img_size' in config
|
358 |
+
if isinstance(config['img_size'], str):
|
359 |
+
assert "," in config['img_size'], "img_size should be a string with comma separated img_size=H,W"
|
360 |
+
config['img_size'] = list(map(int, config['img_size'].split(",")))
|
361 |
+
assert len(
|
362 |
+
config['img_size']) == 2, "img_size should be a string with comma separated img_size=H,W"
|
363 |
+
elif isinstance(config['img_size'], int):
|
364 |
+
config['img_size'] = [config['img_size'], config['img_size']]
|
365 |
+
else:
|
366 |
+
assert isinstance(config['img_size'], list) and len(
|
367 |
+
config['img_size']) == 2, "img_size should be a list of H,W"
|
368 |
+
return config
|
369 |
+
|
370 |
+
|
371 |
+
nchannels2models = {
|
372 |
+
tuple([256]*5): ["DPT_BEiT_L_384", "DPT_BEiT_L_512", "DPT_BEiT_B_384", "DPT_SwinV2_L_384", "DPT_SwinV2_B_384", "DPT_SwinV2_T_256", "DPT_Large", "DPT_Hybrid"],
|
373 |
+
(512, 256, 128, 64, 64): ["MiDaS_small"]
|
374 |
+
}
|
375 |
+
|
376 |
+
# Model name to number of output channels
|
377 |
+
MIDAS_SETTINGS = {m: k for k, v in nchannels2models.items()
|
378 |
+
for m in v
|
379 |
+
}
|
380 |
+
# print('MIDAS_SETTINGS:', MIDAS_SETTINGS)
|
metric_depth/zoedepth/models/builder.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
from importlib import import_module
|
26 |
+
from zoedepth.models.depth_model import DepthModel
|
27 |
+
|
28 |
+
def build_model(config) -> DepthModel:
|
29 |
+
"""Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface.
|
30 |
+
This function should be used to construct models for training and evaluation.
|
31 |
+
|
32 |
+
Args:
|
33 |
+
config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder.
|
34 |
+
|
35 |
+
Returns:
|
36 |
+
torch.nn.Module: Model corresponding to name and version as specified in config
|
37 |
+
"""
|
38 |
+
module_name = f"zoedepth.models.{config.model}"
|
39 |
+
try:
|
40 |
+
module = import_module(module_name)
|
41 |
+
except ModuleNotFoundError as e:
|
42 |
+
# print the original error message
|
43 |
+
print(e)
|
44 |
+
raise ValueError(
|
45 |
+
f"Model {config.model} not found. Refer above error for details.") from e
|
46 |
+
try:
|
47 |
+
get_version = getattr(module, "get_version")
|
48 |
+
except AttributeError as e:
|
49 |
+
raise ValueError(
|
50 |
+
f"Model {config.model} has no get_version function.") from e
|
51 |
+
return get_version(config.version_name).build_from_config(config)
|
metric_depth/zoedepth/models/depth_model.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import numpy as np
|
26 |
+
import torch
|
27 |
+
import torch.nn as nn
|
28 |
+
import torch.nn.functional as F
|
29 |
+
from torchvision import transforms
|
30 |
+
import PIL.Image
|
31 |
+
from PIL import Image
|
32 |
+
from typing import Union
|
33 |
+
|
34 |
+
|
35 |
+
class DepthModel(nn.Module):
|
36 |
+
def __init__(self):
|
37 |
+
super().__init__()
|
38 |
+
self.device = 'cpu'
|
39 |
+
|
40 |
+
def to(self, device) -> nn.Module:
|
41 |
+
self.device = device
|
42 |
+
return super().to(device)
|
43 |
+
|
44 |
+
def forward(self, x, *args, **kwargs):
|
45 |
+
raise NotImplementedError
|
46 |
+
|
47 |
+
def _infer(self, x: torch.Tensor):
|
48 |
+
"""
|
49 |
+
Inference interface for the model
|
50 |
+
Args:
|
51 |
+
x (torch.Tensor): input tensor of shape (b, c, h, w)
|
52 |
+
Returns:
|
53 |
+
torch.Tensor: output tensor of shape (b, 1, h, w)
|
54 |
+
"""
|
55 |
+
return self(x)['metric_depth']
|
56 |
+
|
57 |
+
def _infer_with_pad_aug(self, x: torch.Tensor, pad_input: bool=True, fh: float=3, fw: float=3, upsampling_mode: str='bicubic', padding_mode="reflect", **kwargs) -> torch.Tensor:
|
58 |
+
"""
|
59 |
+
Inference interface for the model with padding augmentation
|
60 |
+
Padding augmentation fixes the boundary artifacts in the output depth map.
|
61 |
+
Boundary artifacts are sometimes caused by the fact that the model is trained on NYU raw dataset which has a black or white border around the image.
|
62 |
+
This augmentation pads the input image and crops the prediction back to the original size / view.
|
63 |
+
|
64 |
+
Note: This augmentation is not required for the models trained with 'avoid_boundary'=True.
|
65 |
+
Args:
|
66 |
+
x (torch.Tensor): input tensor of shape (b, c, h, w)
|
67 |
+
pad_input (bool, optional): whether to pad the input or not. Defaults to True.
|
68 |
+
fh (float, optional): height padding factor. The padding is calculated as sqrt(h/2) * fh. Defaults to 3.
|
69 |
+
fw (float, optional): width padding factor. The padding is calculated as sqrt(w/2) * fw. Defaults to 3.
|
70 |
+
upsampling_mode (str, optional): upsampling mode. Defaults to 'bicubic'.
|
71 |
+
padding_mode (str, optional): padding mode. Defaults to "reflect".
|
72 |
+
Returns:
|
73 |
+
torch.Tensor: output tensor of shape (b, 1, h, w)
|
74 |
+
"""
|
75 |
+
# assert x is nchw and c = 3
|
76 |
+
assert x.dim() == 4, "x must be 4 dimensional, got {}".format(x.dim())
|
77 |
+
assert x.shape[1] == 3, "x must have 3 channels, got {}".format(x.shape[1])
|
78 |
+
|
79 |
+
if pad_input:
|
80 |
+
assert fh > 0 or fw > 0, "atlease one of fh and fw must be greater than 0"
|
81 |
+
pad_h = int(np.sqrt(x.shape[2]/2) * fh)
|
82 |
+
pad_w = int(np.sqrt(x.shape[3]/2) * fw)
|
83 |
+
padding = [pad_w, pad_w]
|
84 |
+
if pad_h > 0:
|
85 |
+
padding += [pad_h, pad_h]
|
86 |
+
|
87 |
+
x = F.pad(x, padding, mode=padding_mode, **kwargs)
|
88 |
+
out = self._infer(x)
|
89 |
+
if out.shape[-2:] != x.shape[-2:]:
|
90 |
+
out = F.interpolate(out, size=(x.shape[2], x.shape[3]), mode=upsampling_mode, align_corners=False)
|
91 |
+
if pad_input:
|
92 |
+
# crop to the original size, handling the case where pad_h and pad_w is 0
|
93 |
+
if pad_h > 0:
|
94 |
+
out = out[:, :, pad_h:-pad_h,:]
|
95 |
+
if pad_w > 0:
|
96 |
+
out = out[:, :, :, pad_w:-pad_w]
|
97 |
+
return out
|
98 |
+
|
99 |
+
def infer_with_flip_aug(self, x, pad_input: bool=True, **kwargs) -> torch.Tensor:
|
100 |
+
"""
|
101 |
+
Inference interface for the model with horizontal flip augmentation
|
102 |
+
Horizontal flip augmentation improves the accuracy of the model by averaging the output of the model with and without horizontal flip.
|
103 |
+
Args:
|
104 |
+
x (torch.Tensor): input tensor of shape (b, c, h, w)
|
105 |
+
pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
|
106 |
+
Returns:
|
107 |
+
torch.Tensor: output tensor of shape (b, 1, h, w)
|
108 |
+
"""
|
109 |
+
# infer with horizontal flip and average
|
110 |
+
out = self._infer_with_pad_aug(x, pad_input=pad_input, **kwargs)
|
111 |
+
out_flip = self._infer_with_pad_aug(torch.flip(x, dims=[3]), pad_input=pad_input, **kwargs)
|
112 |
+
out = (out + torch.flip(out_flip, dims=[3])) / 2
|
113 |
+
return out
|
114 |
+
|
115 |
+
def infer(self, x, pad_input: bool=True, with_flip_aug: bool=True, **kwargs) -> torch.Tensor:
|
116 |
+
"""
|
117 |
+
Inference interface for the model
|
118 |
+
Args:
|
119 |
+
x (torch.Tensor): input tensor of shape (b, c, h, w)
|
120 |
+
pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
|
121 |
+
with_flip_aug (bool, optional): whether to use horizontal flip augmentation. Defaults to True.
|
122 |
+
Returns:
|
123 |
+
torch.Tensor: output tensor of shape (b, 1, h, w)
|
124 |
+
"""
|
125 |
+
if with_flip_aug:
|
126 |
+
return self.infer_with_flip_aug(x, pad_input=pad_input, **kwargs)
|
127 |
+
else:
|
128 |
+
return self._infer_with_pad_aug(x, pad_input=pad_input, **kwargs)
|
129 |
+
|
130 |
+
@torch.no_grad()
|
131 |
+
def infer_pil(self, pil_img, pad_input: bool=True, with_flip_aug: bool=True, output_type: str="numpy", **kwargs) -> Union[np.ndarray, PIL.Image.Image, torch.Tensor]:
|
132 |
+
"""
|
133 |
+
Inference interface for the model for PIL image
|
134 |
+
Args:
|
135 |
+
pil_img (PIL.Image.Image): input PIL image
|
136 |
+
pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
|
137 |
+
with_flip_aug (bool, optional): whether to use horizontal flip augmentation. Defaults to True.
|
138 |
+
output_type (str, optional): output type. Supported values are 'numpy', 'pil' and 'tensor'. Defaults to "numpy".
|
139 |
+
"""
|
140 |
+
x = transforms.ToTensor()(pil_img).unsqueeze(0).to(self.device)
|
141 |
+
out_tensor = self.infer(x, pad_input=pad_input, with_flip_aug=with_flip_aug, **kwargs)
|
142 |
+
if output_type == "numpy":
|
143 |
+
return out_tensor.squeeze().cpu().numpy()
|
144 |
+
elif output_type == "pil":
|
145 |
+
# uint16 is required for depth pil image
|
146 |
+
out_16bit_numpy = (out_tensor.squeeze().cpu().numpy()*256).astype(np.uint16)
|
147 |
+
return Image.fromarray(out_16bit_numpy)
|
148 |
+
elif output_type == "tensor":
|
149 |
+
return out_tensor.squeeze().cpu()
|
150 |
+
else:
|
151 |
+
raise ValueError(f"output_type {output_type} not supported. Supported values are 'numpy', 'pil' and 'tensor'")
|
152 |
+
|
metric_depth/zoedepth/models/layers/attractor.py
ADDED
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import torch
|
26 |
+
import torch.nn as nn
|
27 |
+
|
28 |
+
|
29 |
+
@torch.jit.script
|
30 |
+
def exp_attractor(dx, alpha: float = 300, gamma: int = 2):
|
31 |
+
"""Exponential attractor: dc = exp(-alpha*|dx|^gamma) * dx , where dx = a - c, a = attractor point, c = bin center, dc = shift in bin centermmary for exp_attractor
|
32 |
+
|
33 |
+
Args:
|
34 |
+
dx (torch.Tensor): The difference tensor dx = Ai - Cj, where Ai is the attractor point and Cj is the bin center.
|
35 |
+
alpha (float, optional): Proportional Attractor strength. Determines the absolute strength. Lower alpha = greater attraction. Defaults to 300.
|
36 |
+
gamma (int, optional): Exponential Attractor strength. Determines the "region of influence" and indirectly number of bin centers affected. Lower gamma = farther reach. Defaults to 2.
|
37 |
+
|
38 |
+
Returns:
|
39 |
+
torch.Tensor : Delta shifts - dc; New bin centers = Old bin centers + dc
|
40 |
+
"""
|
41 |
+
return torch.exp(-alpha*(torch.abs(dx)**gamma)) * (dx)
|
42 |
+
|
43 |
+
|
44 |
+
@torch.jit.script
|
45 |
+
def inv_attractor(dx, alpha: float = 300, gamma: int = 2):
|
46 |
+
"""Inverse attractor: dc = dx / (1 + alpha*dx^gamma), where dx = a - c, a = attractor point, c = bin center, dc = shift in bin center
|
47 |
+
This is the default one according to the accompanying paper.
|
48 |
+
|
49 |
+
Args:
|
50 |
+
dx (torch.Tensor): The difference tensor dx = Ai - Cj, where Ai is the attractor point and Cj is the bin center.
|
51 |
+
alpha (float, optional): Proportional Attractor strength. Determines the absolute strength. Lower alpha = greater attraction. Defaults to 300.
|
52 |
+
gamma (int, optional): Exponential Attractor strength. Determines the "region of influence" and indirectly number of bin centers affected. Lower gamma = farther reach. Defaults to 2.
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
torch.Tensor: Delta shifts - dc; New bin centers = Old bin centers + dc
|
56 |
+
"""
|
57 |
+
return dx.div(1+alpha*dx.pow(gamma))
|
58 |
+
|
59 |
+
|
60 |
+
class AttractorLayer(nn.Module):
|
61 |
+
def __init__(self, in_features, n_bins, n_attractors=16, mlp_dim=128, min_depth=1e-3, max_depth=10,
|
62 |
+
alpha=300, gamma=2, kind='sum', attractor_type='exp', memory_efficient=False):
|
63 |
+
"""
|
64 |
+
Attractor layer for bin centers. Bin centers are bounded on the interval (min_depth, max_depth)
|
65 |
+
"""
|
66 |
+
super().__init__()
|
67 |
+
|
68 |
+
self.n_attractors = n_attractors
|
69 |
+
self.n_bins = n_bins
|
70 |
+
self.min_depth = min_depth
|
71 |
+
self.max_depth = max_depth
|
72 |
+
self.alpha = alpha
|
73 |
+
self.gamma = gamma
|
74 |
+
self.kind = kind
|
75 |
+
self.attractor_type = attractor_type
|
76 |
+
self.memory_efficient = memory_efficient
|
77 |
+
|
78 |
+
self._net = nn.Sequential(
|
79 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
80 |
+
nn.ReLU(inplace=True),
|
81 |
+
nn.Conv2d(mlp_dim, n_attractors*2, 1, 1, 0), # x2 for linear norm
|
82 |
+
nn.ReLU(inplace=True)
|
83 |
+
)
|
84 |
+
|
85 |
+
def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
|
86 |
+
"""
|
87 |
+
Args:
|
88 |
+
x (torch.Tensor) : feature block; shape - n, c, h, w
|
89 |
+
b_prev (torch.Tensor) : previous bin centers normed; shape - n, prev_nbins, h, w
|
90 |
+
|
91 |
+
Returns:
|
92 |
+
tuple(torch.Tensor,torch.Tensor) : new bin centers normed and scaled; shape - n, nbins, h, w
|
93 |
+
"""
|
94 |
+
if prev_b_embedding is not None:
|
95 |
+
if interpolate:
|
96 |
+
prev_b_embedding = nn.functional.interpolate(
|
97 |
+
prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
|
98 |
+
x = x + prev_b_embedding
|
99 |
+
|
100 |
+
A = self._net(x)
|
101 |
+
eps = 1e-3
|
102 |
+
A = A + eps
|
103 |
+
n, c, h, w = A.shape
|
104 |
+
A = A.view(n, self.n_attractors, 2, h, w)
|
105 |
+
A_normed = A / A.sum(dim=2, keepdim=True) # n, a, 2, h, w
|
106 |
+
A_normed = A[:, :, 0, ...] # n, na, h, w
|
107 |
+
|
108 |
+
b_prev = nn.functional.interpolate(
|
109 |
+
b_prev, (h, w), mode='bilinear', align_corners=True)
|
110 |
+
b_centers = b_prev
|
111 |
+
|
112 |
+
if self.attractor_type == 'exp':
|
113 |
+
dist = exp_attractor
|
114 |
+
else:
|
115 |
+
dist = inv_attractor
|
116 |
+
|
117 |
+
if not self.memory_efficient:
|
118 |
+
func = {'mean': torch.mean, 'sum': torch.sum}[self.kind]
|
119 |
+
# .shape N, nbins, h, w
|
120 |
+
delta_c = func(dist(A_normed.unsqueeze(
|
121 |
+
2) - b_centers.unsqueeze(1)), dim=1)
|
122 |
+
else:
|
123 |
+
delta_c = torch.zeros_like(b_centers, device=b_centers.device)
|
124 |
+
for i in range(self.n_attractors):
|
125 |
+
# .shape N, nbins, h, w
|
126 |
+
delta_c += dist(A_normed[:, i, ...].unsqueeze(1) - b_centers)
|
127 |
+
|
128 |
+
if self.kind == 'mean':
|
129 |
+
delta_c = delta_c / self.n_attractors
|
130 |
+
|
131 |
+
b_new_centers = b_centers + delta_c
|
132 |
+
B_centers = (self.max_depth - self.min_depth) * \
|
133 |
+
b_new_centers + self.min_depth
|
134 |
+
B_centers, _ = torch.sort(B_centers, dim=1)
|
135 |
+
B_centers = torch.clip(B_centers, self.min_depth, self.max_depth)
|
136 |
+
return b_new_centers, B_centers
|
137 |
+
|
138 |
+
|
139 |
+
class AttractorLayerUnnormed(nn.Module):
|
140 |
+
def __init__(self, in_features, n_bins, n_attractors=16, mlp_dim=128, min_depth=1e-3, max_depth=10,
|
141 |
+
alpha=300, gamma=2, kind='sum', attractor_type='exp', memory_efficient=False):
|
142 |
+
"""
|
143 |
+
Attractor layer for bin centers. Bin centers are unbounded
|
144 |
+
"""
|
145 |
+
super().__init__()
|
146 |
+
|
147 |
+
self.n_attractors = n_attractors
|
148 |
+
self.n_bins = n_bins
|
149 |
+
self.min_depth = min_depth
|
150 |
+
self.max_depth = max_depth
|
151 |
+
self.alpha = alpha
|
152 |
+
self.gamma = gamma
|
153 |
+
self.kind = kind
|
154 |
+
self.attractor_type = attractor_type
|
155 |
+
self.memory_efficient = memory_efficient
|
156 |
+
|
157 |
+
self._net = nn.Sequential(
|
158 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
159 |
+
nn.ReLU(inplace=True),
|
160 |
+
nn.Conv2d(mlp_dim, n_attractors, 1, 1, 0),
|
161 |
+
nn.Softplus()
|
162 |
+
)
|
163 |
+
|
164 |
+
def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
|
165 |
+
"""
|
166 |
+
Args:
|
167 |
+
x (torch.Tensor) : feature block; shape - n, c, h, w
|
168 |
+
b_prev (torch.Tensor) : previous bin centers normed; shape - n, prev_nbins, h, w
|
169 |
+
|
170 |
+
Returns:
|
171 |
+
tuple(torch.Tensor,torch.Tensor) : new bin centers unbounded; shape - n, nbins, h, w. Two outputs just to keep the API consistent with the normed version
|
172 |
+
"""
|
173 |
+
if prev_b_embedding is not None:
|
174 |
+
if interpolate:
|
175 |
+
prev_b_embedding = nn.functional.interpolate(
|
176 |
+
prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
|
177 |
+
x = x + prev_b_embedding
|
178 |
+
|
179 |
+
A = self._net(x)
|
180 |
+
n, c, h, w = A.shape
|
181 |
+
|
182 |
+
b_prev = nn.functional.interpolate(
|
183 |
+
b_prev, (h, w), mode='bilinear', align_corners=True)
|
184 |
+
b_centers = b_prev
|
185 |
+
|
186 |
+
if self.attractor_type == 'exp':
|
187 |
+
dist = exp_attractor
|
188 |
+
else:
|
189 |
+
dist = inv_attractor
|
190 |
+
|
191 |
+
if not self.memory_efficient:
|
192 |
+
func = {'mean': torch.mean, 'sum': torch.sum}[self.kind]
|
193 |
+
# .shape N, nbins, h, w
|
194 |
+
delta_c = func(
|
195 |
+
dist(A.unsqueeze(2) - b_centers.unsqueeze(1)), dim=1)
|
196 |
+
else:
|
197 |
+
delta_c = torch.zeros_like(b_centers, device=b_centers.device)
|
198 |
+
for i in range(self.n_attractors):
|
199 |
+
delta_c += dist(A[:, i, ...].unsqueeze(1) -
|
200 |
+
b_centers) # .shape N, nbins, h, w
|
201 |
+
|
202 |
+
if self.kind == 'mean':
|
203 |
+
delta_c = delta_c / self.n_attractors
|
204 |
+
|
205 |
+
b_new_centers = b_centers + delta_c
|
206 |
+
B_centers = b_new_centers
|
207 |
+
|
208 |
+
return b_new_centers, B_centers
|
metric_depth/zoedepth/models/layers/dist_layers.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import torch
|
26 |
+
import torch.nn as nn
|
27 |
+
|
28 |
+
|
29 |
+
def log_binom(n, k, eps=1e-7):
|
30 |
+
""" log(nCk) using stirling approximation """
|
31 |
+
n = n + eps
|
32 |
+
k = k + eps
|
33 |
+
return n * torch.log(n) - k * torch.log(k) - (n-k) * torch.log(n-k+eps)
|
34 |
+
|
35 |
+
|
36 |
+
class LogBinomial(nn.Module):
|
37 |
+
def __init__(self, n_classes=256, act=torch.softmax):
|
38 |
+
"""Compute log binomial distribution for n_classes
|
39 |
+
|
40 |
+
Args:
|
41 |
+
n_classes (int, optional): number of output classes. Defaults to 256.
|
42 |
+
"""
|
43 |
+
super().__init__()
|
44 |
+
self.K = n_classes
|
45 |
+
self.act = act
|
46 |
+
self.register_buffer('k_idx', torch.arange(
|
47 |
+
0, n_classes).view(1, -1, 1, 1))
|
48 |
+
self.register_buffer('K_minus_1', torch.Tensor(
|
49 |
+
[self.K-1]).view(1, -1, 1, 1))
|
50 |
+
|
51 |
+
def forward(self, x, t=1., eps=1e-4):
|
52 |
+
"""Compute log binomial distribution for x
|
53 |
+
|
54 |
+
Args:
|
55 |
+
x (torch.Tensor - NCHW): probabilities
|
56 |
+
t (float, torch.Tensor - NCHW, optional): Temperature of distribution. Defaults to 1..
|
57 |
+
eps (float, optional): Small number for numerical stability. Defaults to 1e-4.
|
58 |
+
|
59 |
+
Returns:
|
60 |
+
torch.Tensor -NCHW: log binomial distribution logbinomial(p;t)
|
61 |
+
"""
|
62 |
+
if x.ndim == 3:
|
63 |
+
x = x.unsqueeze(1) # make it nchw
|
64 |
+
|
65 |
+
one_minus_x = torch.clamp(1 - x, eps, 1)
|
66 |
+
x = torch.clamp(x, eps, 1)
|
67 |
+
y = log_binom(self.K_minus_1, self.k_idx) + self.k_idx * \
|
68 |
+
torch.log(x) + (self.K - 1 - self.k_idx) * torch.log(one_minus_x)
|
69 |
+
return self.act(y/t, dim=1)
|
70 |
+
|
71 |
+
|
72 |
+
class ConditionalLogBinomial(nn.Module):
|
73 |
+
def __init__(self, in_features, condition_dim, n_classes=256, bottleneck_factor=2, p_eps=1e-4, max_temp=50, min_temp=1e-7, act=torch.softmax):
|
74 |
+
"""Conditional Log Binomial distribution
|
75 |
+
|
76 |
+
Args:
|
77 |
+
in_features (int): number of input channels in main feature
|
78 |
+
condition_dim (int): number of input channels in condition feature
|
79 |
+
n_classes (int, optional): Number of classes. Defaults to 256.
|
80 |
+
bottleneck_factor (int, optional): Hidden dim factor. Defaults to 2.
|
81 |
+
p_eps (float, optional): small eps value. Defaults to 1e-4.
|
82 |
+
max_temp (float, optional): Maximum temperature of output distribution. Defaults to 50.
|
83 |
+
min_temp (float, optional): Minimum temperature of output distribution. Defaults to 1e-7.
|
84 |
+
"""
|
85 |
+
super().__init__()
|
86 |
+
self.p_eps = p_eps
|
87 |
+
self.max_temp = max_temp
|
88 |
+
self.min_temp = min_temp
|
89 |
+
self.log_binomial_transform = LogBinomial(n_classes, act=act)
|
90 |
+
bottleneck = (in_features + condition_dim) // bottleneck_factor
|
91 |
+
self.mlp = nn.Sequential(
|
92 |
+
nn.Conv2d(in_features + condition_dim, bottleneck,
|
93 |
+
kernel_size=1, stride=1, padding=0),
|
94 |
+
nn.GELU(),
|
95 |
+
# 2 for p linear norm, 2 for t linear norm
|
96 |
+
nn.Conv2d(bottleneck, 2+2, kernel_size=1, stride=1, padding=0),
|
97 |
+
nn.Softplus()
|
98 |
+
)
|
99 |
+
|
100 |
+
def forward(self, x, cond):
|
101 |
+
"""Forward pass
|
102 |
+
|
103 |
+
Args:
|
104 |
+
x (torch.Tensor - NCHW): Main feature
|
105 |
+
cond (torch.Tensor - NCHW): condition feature
|
106 |
+
|
107 |
+
Returns:
|
108 |
+
torch.Tensor: Output log binomial distribution
|
109 |
+
"""
|
110 |
+
pt = self.mlp(torch.concat((x, cond), dim=1))
|
111 |
+
p, t = pt[:, :2, ...], pt[:, 2:, ...]
|
112 |
+
|
113 |
+
p = p + self.p_eps
|
114 |
+
p = p[:, 0, ...] / (p[:, 0, ...] + p[:, 1, ...])
|
115 |
+
|
116 |
+
t = t + self.p_eps
|
117 |
+
t = t[:, 0, ...] / (t[:, 0, ...] + t[:, 1, ...])
|
118 |
+
t = t.unsqueeze(1)
|
119 |
+
t = (self.max_temp - self.min_temp) * t + self.min_temp
|
120 |
+
|
121 |
+
return self.log_binomial_transform(p, t)
|
metric_depth/zoedepth/models/layers/localbins_layers.py
ADDED
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import torch
|
26 |
+
import torch.nn as nn
|
27 |
+
|
28 |
+
|
29 |
+
class SeedBinRegressor(nn.Module):
|
30 |
+
def __init__(self, in_features, n_bins=16, mlp_dim=256, min_depth=1e-3, max_depth=10):
|
31 |
+
"""Bin center regressor network. Bin centers are bounded on (min_depth, max_depth) interval.
|
32 |
+
|
33 |
+
Args:
|
34 |
+
in_features (int): input channels
|
35 |
+
n_bins (int, optional): Number of bin centers. Defaults to 16.
|
36 |
+
mlp_dim (int, optional): Hidden dimension. Defaults to 256.
|
37 |
+
min_depth (float, optional): Min depth value. Defaults to 1e-3.
|
38 |
+
max_depth (float, optional): Max depth value. Defaults to 10.
|
39 |
+
"""
|
40 |
+
super().__init__()
|
41 |
+
self.version = "1_1"
|
42 |
+
self.min_depth = min_depth
|
43 |
+
self.max_depth = max_depth
|
44 |
+
|
45 |
+
self._net = nn.Sequential(
|
46 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
47 |
+
nn.ReLU(inplace=True),
|
48 |
+
nn.Conv2d(mlp_dim, n_bins, 1, 1, 0),
|
49 |
+
nn.ReLU(inplace=True)
|
50 |
+
)
|
51 |
+
|
52 |
+
def forward(self, x):
|
53 |
+
"""
|
54 |
+
Returns tensor of bin_width vectors (centers). One vector b for every pixel
|
55 |
+
"""
|
56 |
+
B = self._net(x)
|
57 |
+
eps = 1e-3
|
58 |
+
B = B + eps
|
59 |
+
B_widths_normed = B / B.sum(dim=1, keepdim=True)
|
60 |
+
B_widths = (self.max_depth - self.min_depth) * \
|
61 |
+
B_widths_normed # .shape NCHW
|
62 |
+
# pad has the form (left, right, top, bottom, front, back)
|
63 |
+
B_widths = nn.functional.pad(
|
64 |
+
B_widths, (0, 0, 0, 0, 1, 0), mode='constant', value=self.min_depth)
|
65 |
+
B_edges = torch.cumsum(B_widths, dim=1) # .shape NCHW
|
66 |
+
|
67 |
+
B_centers = 0.5 * (B_edges[:, :-1, ...] + B_edges[:, 1:, ...])
|
68 |
+
return B_widths_normed, B_centers
|
69 |
+
|
70 |
+
|
71 |
+
class SeedBinRegressorUnnormed(nn.Module):
|
72 |
+
def __init__(self, in_features, n_bins=16, mlp_dim=256, min_depth=1e-3, max_depth=10):
|
73 |
+
"""Bin center regressor network. Bin centers are unbounded
|
74 |
+
|
75 |
+
Args:
|
76 |
+
in_features (int): input channels
|
77 |
+
n_bins (int, optional): Number of bin centers. Defaults to 16.
|
78 |
+
mlp_dim (int, optional): Hidden dimension. Defaults to 256.
|
79 |
+
min_depth (float, optional): Not used. (for compatibility with SeedBinRegressor)
|
80 |
+
max_depth (float, optional): Not used. (for compatibility with SeedBinRegressor)
|
81 |
+
"""
|
82 |
+
super().__init__()
|
83 |
+
self.version = "1_1"
|
84 |
+
self._net = nn.Sequential(
|
85 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
86 |
+
nn.ReLU(inplace=True),
|
87 |
+
nn.Conv2d(mlp_dim, n_bins, 1, 1, 0),
|
88 |
+
nn.Softplus()
|
89 |
+
)
|
90 |
+
|
91 |
+
def forward(self, x):
|
92 |
+
"""
|
93 |
+
Returns tensor of bin_width vectors (centers). One vector b for every pixel
|
94 |
+
"""
|
95 |
+
B_centers = self._net(x)
|
96 |
+
return B_centers, B_centers
|
97 |
+
|
98 |
+
|
99 |
+
class Projector(nn.Module):
|
100 |
+
def __init__(self, in_features, out_features, mlp_dim=128):
|
101 |
+
"""Projector MLP
|
102 |
+
|
103 |
+
Args:
|
104 |
+
in_features (int): input channels
|
105 |
+
out_features (int): output channels
|
106 |
+
mlp_dim (int, optional): hidden dimension. Defaults to 128.
|
107 |
+
"""
|
108 |
+
super().__init__()
|
109 |
+
|
110 |
+
self._net = nn.Sequential(
|
111 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
112 |
+
nn.ReLU(inplace=True),
|
113 |
+
nn.Conv2d(mlp_dim, out_features, 1, 1, 0),
|
114 |
+
)
|
115 |
+
|
116 |
+
def forward(self, x):
|
117 |
+
return self._net(x)
|
118 |
+
|
119 |
+
|
120 |
+
|
121 |
+
class LinearSplitter(nn.Module):
|
122 |
+
def __init__(self, in_features, prev_nbins, split_factor=2, mlp_dim=128, min_depth=1e-3, max_depth=10):
|
123 |
+
super().__init__()
|
124 |
+
|
125 |
+
self.prev_nbins = prev_nbins
|
126 |
+
self.split_factor = split_factor
|
127 |
+
self.min_depth = min_depth
|
128 |
+
self.max_depth = max_depth
|
129 |
+
|
130 |
+
self._net = nn.Sequential(
|
131 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
132 |
+
nn.GELU(),
|
133 |
+
nn.Conv2d(mlp_dim, prev_nbins * split_factor, 1, 1, 0),
|
134 |
+
nn.ReLU()
|
135 |
+
)
|
136 |
+
|
137 |
+
def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
|
138 |
+
"""
|
139 |
+
x : feature block; shape - n, c, h, w
|
140 |
+
b_prev : previous bin widths normed; shape - n, prev_nbins, h, w
|
141 |
+
"""
|
142 |
+
if prev_b_embedding is not None:
|
143 |
+
if interpolate:
|
144 |
+
prev_b_embedding = nn.functional.interpolate(prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
|
145 |
+
x = x + prev_b_embedding
|
146 |
+
S = self._net(x)
|
147 |
+
eps = 1e-3
|
148 |
+
S = S + eps
|
149 |
+
n, c, h, w = S.shape
|
150 |
+
S = S.view(n, self.prev_nbins, self.split_factor, h, w)
|
151 |
+
S_normed = S / S.sum(dim=2, keepdim=True) # fractional splits
|
152 |
+
|
153 |
+
b_prev = nn.functional.interpolate(b_prev, (h,w), mode='bilinear', align_corners=True)
|
154 |
+
|
155 |
+
|
156 |
+
b_prev = b_prev / b_prev.sum(dim=1, keepdim=True) # renormalize for gurantees
|
157 |
+
# print(b_prev.shape, S_normed.shape)
|
158 |
+
# if is_for_query:(1).expand(-1, b_prev.size(0)//n, -1, -1, -1, -1).flatten(0,1) # TODO ? can replace all this with a single torch.repeat?
|
159 |
+
b = b_prev.unsqueeze(2) * S_normed
|
160 |
+
b = b.flatten(1,2) # .shape n, prev_nbins * split_factor, h, w
|
161 |
+
|
162 |
+
# calculate bin centers for loss calculation
|
163 |
+
B_widths = (self.max_depth - self.min_depth) * b # .shape N, nprev * splitfactor, H, W
|
164 |
+
# pad has the form (left, right, top, bottom, front, back)
|
165 |
+
B_widths = nn.functional.pad(B_widths, (0,0,0,0,1,0), mode='constant', value=self.min_depth)
|
166 |
+
B_edges = torch.cumsum(B_widths, dim=1) # .shape NCHW
|
167 |
+
|
168 |
+
B_centers = 0.5 * (B_edges[:, :-1, ...] + B_edges[:,1:,...])
|
169 |
+
return b, B_centers
|
metric_depth/zoedepth/models/layers/patch_transformer.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import torch
|
26 |
+
import torch.nn as nn
|
27 |
+
|
28 |
+
|
29 |
+
class PatchTransformerEncoder(nn.Module):
|
30 |
+
def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4, use_class_token=False):
|
31 |
+
"""ViT-like transformer block
|
32 |
+
|
33 |
+
Args:
|
34 |
+
in_channels (int): Input channels
|
35 |
+
patch_size (int, optional): patch size. Defaults to 10.
|
36 |
+
embedding_dim (int, optional): Embedding dimension in transformer model. Defaults to 128.
|
37 |
+
num_heads (int, optional): number of attention heads. Defaults to 4.
|
38 |
+
use_class_token (bool, optional): Whether to use extra token at the start for global accumulation (called as "class token"). Defaults to False.
|
39 |
+
"""
|
40 |
+
super(PatchTransformerEncoder, self).__init__()
|
41 |
+
self.use_class_token = use_class_token
|
42 |
+
encoder_layers = nn.TransformerEncoderLayer(
|
43 |
+
embedding_dim, num_heads, dim_feedforward=1024)
|
44 |
+
self.transformer_encoder = nn.TransformerEncoder(
|
45 |
+
encoder_layers, num_layers=4) # takes shape S,N,E
|
46 |
+
|
47 |
+
self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim,
|
48 |
+
kernel_size=patch_size, stride=patch_size, padding=0)
|
49 |
+
|
50 |
+
def positional_encoding_1d(self, sequence_length, batch_size, embedding_dim, device='cpu'):
|
51 |
+
"""Generate positional encodings
|
52 |
+
|
53 |
+
Args:
|
54 |
+
sequence_length (int): Sequence length
|
55 |
+
embedding_dim (int): Embedding dimension
|
56 |
+
|
57 |
+
Returns:
|
58 |
+
torch.Tensor SBE: Positional encodings
|
59 |
+
"""
|
60 |
+
position = torch.arange(
|
61 |
+
0, sequence_length, dtype=torch.float32, device=device).unsqueeze(1)
|
62 |
+
index = torch.arange(
|
63 |
+
0, embedding_dim, 2, dtype=torch.float32, device=device).unsqueeze(0)
|
64 |
+
div_term = torch.exp(index * (-torch.log(torch.tensor(10000.0, device=device)) / embedding_dim))
|
65 |
+
pos_encoding = position * div_term
|
66 |
+
pos_encoding = torch.cat([torch.sin(pos_encoding), torch.cos(pos_encoding)], dim=1)
|
67 |
+
pos_encoding = pos_encoding.unsqueeze(1).repeat(1, batch_size, 1)
|
68 |
+
return pos_encoding
|
69 |
+
|
70 |
+
|
71 |
+
def forward(self, x):
|
72 |
+
"""Forward pass
|
73 |
+
|
74 |
+
Args:
|
75 |
+
x (torch.Tensor - NCHW): Input feature tensor
|
76 |
+
|
77 |
+
Returns:
|
78 |
+
torch.Tensor - SNE: Transformer output embeddings. S - sequence length (=HW/patch_size^2), N - batch size, E - embedding dim
|
79 |
+
"""
|
80 |
+
embeddings = self.embedding_convPxP(x).flatten(
|
81 |
+
2) # .shape = n,c,s = n, embedding_dim, s
|
82 |
+
if self.use_class_token:
|
83 |
+
# extra special token at start ?
|
84 |
+
embeddings = nn.functional.pad(embeddings, (1, 0))
|
85 |
+
|
86 |
+
# change to S,N,E format required by transformer
|
87 |
+
embeddings = embeddings.permute(2, 0, 1)
|
88 |
+
S, N, E = embeddings.shape
|
89 |
+
embeddings = embeddings + self.positional_encoding_1d(S, N, E, device=embeddings.device)
|
90 |
+
x = self.transformer_encoder(embeddings) # .shape = S, N, E
|
91 |
+
return x
|
metric_depth/zoedepth/models/model_io.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MIT License
|
2 |
+
|
3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
4 |
+
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
# File author: Shariq Farooq Bhat
|
24 |
+
|
25 |
+
import torch
|
26 |
+
|
27 |
+
def load_state_dict(model, state_dict):
|
28 |
+
"""Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict.
|
29 |
+
|
30 |
+
DataParallel prefixes state_dict keys with 'module.' when saving.
|
31 |
+
If the model is not a DataParallel model but the state_dict is, then prefixes are removed.
|
32 |
+
If the model is a DataParallel model but the state_dict is not, then prefixes are added.
|
33 |
+
"""
|
34 |
+
state_dict = state_dict.get('model', state_dict)
|
35 |
+
# if model is a DataParallel model, then state_dict keys are prefixed with 'module.'
|
36 |
+
|
37 |
+
do_prefix = isinstance(
|
38 |
+
model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel))
|
39 |
+
state = {}
|
40 |
+
for k, v in state_dict.items():
|
41 |
+
if k.startswith('module.') and not do_prefix:
|
42 |
+
k = k[7:]
|
43 |
+
|
44 |
+
if not k.startswith('module.') and do_prefix:
|
45 |
+
k = 'module.' + k
|
46 |
+
|
47 |
+
state[k] = v
|
48 |
+
|
49 |
+
model.load_state_dict(state)
|
50 |
+
print("Loaded successfully")
|
51 |
+
return model
|
52 |
+
|
53 |
+
|
54 |
+
def load_wts(model, checkpoint_path):
|
55 |
+
ckpt = torch.load(checkpoint_path, map_location='cpu')
|
56 |
+
return load_state_dict(model, ckpt)
|
57 |
+
|
58 |
+
|
59 |
+
def load_state_dict_from_url(model, url, **kwargs):
|
60 |
+
state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs)
|
61 |
+
return load_state_dict(model, state_dict)
|
62 |
+
|
63 |
+
|
64 |
+
def load_state_from_resource(model, resource: str):
|
65 |
+
"""Loads weights to the model from a given resource. A resource can be of following types:
|
66 |
+
1. URL. Prefixed with "url::"
|
67 |
+
e.g. url::http(s)://url.resource.com/ckpt.pt
|
68 |
+
|
69 |
+
2. Local path. Prefixed with "local::"
|
70 |
+
e.g. local::/path/to/ckpt.pt
|
71 |
+
|
72 |
+
|
73 |
+
Args:
|
74 |
+
model (torch.nn.Module): Model
|
75 |
+
resource (str): resource string
|
76 |
+
|
77 |
+
Returns:
|
78 |
+
torch.nn.Module: Model with loaded weights
|
79 |
+
"""
|
80 |
+
print(f"Using pretrained resource {resource}")
|
81 |
+
|
82 |
+
if resource.startswith('url::'):
|
83 |
+
url = resource.split('url::')[1]
|
84 |
+
return load_state_dict_from_url(model, url, progress=True)
|
85 |
+
|
86 |
+
elif resource.startswith('local::'):
|
87 |
+
path = resource.split('local::')[1]
|
88 |
+
return load_wts(model, path)
|
89 |
+
|
90 |
+
else:
|
91 |
+
raise ValueError("Invalid resource type, only url:: and local:: are supported")
|
92 |
+
|