MackinationsAi commited on
Commit
4311f9e
1 Parent(s): 7265117

Upload 4 files

Browse files
README.md ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-4.0
3
+
4
+ language:
5
+ - en
6
+ pipeline_tag: depth-estimation
7
+ tags:
8
+ - depth
9
+ - relative depth
10
+ ---
11
+
12
+ # Depth-Anything-V2-Base
13
+
14
+ ## Introduction
15
+ Depth Anything V2 is trained from 595K synthetic labeled images & 62M+ real unlabeled images, providing the most capable monocular depth estimation (MDE) model with the following features:
16
+ - more fine-grained details than Depth Anything V1
17
+ - more robust than Depth Anything V1 & SD-based models (e.g., Marigold, Geowizard)
18
+ - more efficient (10x faster) & more lightweight than SD-based models
19
+ - impressive fine-tuned performance with our pre-trained models
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ git clone https://github.com/MackinationsAi/Upgraded-Depth-Anything-V2.git
25
+ cd Upgraded-Depth-Anything-V2
26
+ one_click_install.bat
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ Please refer to the [README.md](https://github.com/MackinationsAi/Upgraded-Depth-Anything-V2/blob/main/README.md) for actual usage.
32
+
33
+ ## Test Code
34
+
35
+ ```bash
36
+ cd Upgraded-Depth-Anything-V2
37
+ venv\scripts\activate
38
+ python test.py /path/to/your/image.jpg (or .png)
39
+ ```
40
+ Create a test.py script using the code below:
41
+
42
+ ```python
43
+ import cv2
44
+ import torch
45
+ import numpy as np
46
+ import os
47
+ import argparse
48
+
49
+ from safetensors.torch import load_file
50
+ from depth_anything_v2.dpt import DepthAnythingV2
51
+
52
+ # Argument parser for input image path
53
+ parser = argparse.ArgumentParser(description="Depth map inference using DepthAnythingV2 model.")
54
+ parser.add_argument("input_image_path", type=str, help="Path to the input image")
55
+ args = parser.parse_args()
56
+
57
+ # Determine the directory of this script
58
+ script_dir = os.path.dirname(os.path.abspath(__file__))
59
+
60
+ # Set output path relative to the script directory
61
+ output_image_path = os.path.join(script_dir, "base_udav2_hf-code-test.png")
62
+ checkpoint_path = os.path.join(script_dir, "checkpoints", "depth_anything_v2_vitb.safetensors")
63
+
64
+ # Device selection: CUDA, MPS, or CPU
65
+ if torch.cuda.is_available():
66
+ device = torch.device('cuda')
67
+ elif torch.backends.mps.is_available():
68
+ device = torch.device('mps')
69
+ else:
70
+ device = torch.device('cpu')
71
+
72
+ model = DepthAnythingV2(encoder='vitb', features=128, out_channels=[96, 192, 384, 768])
73
+
74
+ state_dict = load_file(checkpoint_path, device='cpu')
75
+
76
+ model.load_state_dict(state_dict)
77
+ model.to(device)
78
+ model.eval()
79
+
80
+ # Load the input image
81
+ raw_img = cv2.imread(args.input_image_path)
82
+
83
+ # Infer the depth map
84
+ depth = model.infer_image(raw_img) # HxW raw depth map
85
+
86
+ # Normalize the depth map to 0-255 for saving as an image
87
+ depth_normalized = cv2.normalize(depth, None, 0, 255, cv2.NORM_MINMAX)
88
+ depth_normalized = depth_normalized.astype(np.uint8)
89
+
90
+ cv2.imwrite(output_image_path, depth_normalized)
91
+ print(f"Depth map saved at {output_image_path}")
92
+ ```
93
+
94
+ ## Citation
95
+
96
+ If you find this project useful, please consider citing [MackinationsAi](https://github.com/MackinationsAi/) & the following:
97
+
98
+ ```bibtex
99
+ @article{depth_anything_v2,
100
+ title={Depth Anything V2},
101
+ author={Yang, Lihe & Kang, Bingyi & Huang, Zilong & Zhao, Zhen & Xu, Xiaogang & Feng, Jiashi & Zhao, Hengshuang},
102
+ journal={arXiv:2406.09414},
103
+ year={2024}
104
+ }
105
+
106
+ @inproceedings{depth_anything_v1,
107
+ title={Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data},
108
+ author={Yang, Lihe & Kang, Bingyi & Huang, Zilong & Xu, Xiaogang & Feng, Jiashi & Zhao, Hengshuang},
109
+ booktitle={CVPR},
110
+ year={2024}
111
+ }
config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": null,
3
+ "architectures": [
4
+ "DepthAnythingV2ForDepthEstimation"
5
+ ],
6
+ "backbone": null,
7
+ "backbone_config": {
8
+ "architectures": [
9
+ "Dinov2Model"
10
+ ],
11
+ "hidden_size": 768,
12
+ "image_size": 518,
13
+ "model_type": "dinov2",
14
+ "num_attention_heads": 12,
15
+ "out_features": [
16
+ "stage9",
17
+ "stage10",
18
+ "stage11",
19
+ "stage12"
20
+ ],
21
+ "out_indices": [
22
+ 9,
23
+ 10,
24
+ 11,
25
+ 12
26
+ ],
27
+ "patch_size": 14,
28
+ "reshape_hidden_states": false,
29
+ "torch_dtype": "float32"
30
+ },
31
+ "fusion_hidden_size": 128,
32
+ "head_hidden_size": 32,
33
+ "head_in_index": -1,
34
+ "initializer_range": 0.02,
35
+ "model_type": "depth_anything_v2",
36
+ "neck_hidden_sizes": [
37
+ 96,
38
+ 192,
39
+ 384,
40
+ 768
41
+ ],
42
+ "patch_size": 14,
43
+ "reassemble_factors": [
44
+ 4,
45
+ 2,
46
+ 1,
47
+ 0.5
48
+ ],
49
+ "reassemble_hidden_size": 768,
50
+ "torch_dtype": "float32",
51
+ "transformers_version": null,
52
+ "use_pretrained_backbone": false
53
+ }
depth_anything_v2_vitb.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386758cbd2a2cac62ca62286d3ba810734561b3097d86a585dd3dac357153941
3
+ size 194967018
preprocessor_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_pad": false,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "ensure_multiple_of": 14,
7
+ "image_mean": [
8
+ 0.485,
9
+ 0.456,
10
+ 0.406
11
+ ],
12
+ "image_processor_type": "DPTImageProcessor",
13
+ "image_std": [
14
+ 0.229,
15
+ 0.224,
16
+ 0.225
17
+ ],
18
+ "keep_aspect_ratio": true,
19
+ "resample": 3,
20
+ "rescale_factor": 0.00392156862745098,
21
+ "size": {
22
+ "height": 518,
23
+ "width": 518
24
+ },
25
+ "size_divisor": null
26
+ }