sudo-soldier commited on
Commit
b9c32e6
·
verified ·
1 Parent(s): 9b20bc8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -75
app.py CHANGED
@@ -5,12 +5,10 @@ import numpy as np
5
  from PIL import Image
6
  import open3d as o3d
7
  from pathlib import Path
8
- import os
9
 
10
  feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
11
  model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
12
 
13
-
14
  def process_image(image_path):
15
  image_path = Path(image_path)
16
  image_raw = Image.open(image_path)
@@ -18,15 +16,12 @@ def process_image(image_path):
18
  (800, int(800 * image_raw.size[1] / image_raw.size[0])),
19
  Image.Resampling.LANCZOS)
20
 
21
- # prepare image for the model
22
  encoding = feature_extractor(image, return_tensors="pt")
23
-
24
- # forward pass
25
  with torch.no_grad():
26
  outputs = model(**encoding)
27
  predicted_depth = outputs.predicted_depth
28
 
29
- # interpolate to original size
30
  prediction = torch.nn.functional.interpolate(
31
  predicted_depth.unsqueeze(1),
32
  size=image.size[::-1],
@@ -35,97 +30,57 @@ def process_image(image_path):
35
  ).squeeze()
36
  output = prediction.cpu().numpy()
37
  depth_image = (output * 255 / np.max(output)).astype('uint8')
 
38
  try:
39
  gltf_path = create_3d_obj(np.array(image), depth_image, image_path)
40
- img = Image.fromarray(depth_image)
41
- return [img, gltf_path, gltf_path]
42
- except Exception as e:
43
- gltf_path = create_3d_obj(
44
- np.array(image), depth_image, image_path, depth=8)
45
- img = Image.fromarray(depth_image)
46
- return [img, gltf_path, gltf_path]
47
- except:
48
- print("Error reconstructing 3D model")
49
- raise Exception("Error reconstructing 3D model")
50
-
51
 
52
  def create_3d_obj(rgb_image, depth_image, image_path, depth=10):
53
  depth_o3d = o3d.geometry.Image(depth_image)
54
  image_o3d = o3d.geometry.Image(rgb_image)
55
  rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
56
  image_o3d, depth_o3d, convert_rgb_to_intensity=False)
57
- w = int(depth_image.shape[1])
58
- h = int(depth_image.shape[0])
59
 
60
  camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
61
  camera_intrinsic.set_intrinsics(w, h, 500, 500, w/2, h/2)
62
 
63
- pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
64
- rgbd_image, camera_intrinsic)
65
-
66
- print('normals')
67
- pcd.normals = o3d.utility.Vector3dVector(
68
- np.zeros((1, 3))) # invalidate existing normals
69
  pcd.estimate_normals(
70
  search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))
71
- pcd.orient_normals_towards_camera_location(
72
- camera_location=np.array([0., 0., 1000.]))
73
- pcd.transform([[1, 0, 0, 0],
74
- [0, -1, 0, 0],
75
- [0, 0, -1, 0],
76
- [0, 0, 0, 1]])
77
- pcd.transform([[-1, 0, 0, 0],
78
- [0, 1, 0, 0],
79
- [0, 0, 1, 0],
80
- [0, 0, 0, 1]])
81
-
82
- print('run Poisson surface reconstruction')
83
- with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm:
84
- mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
85
  pcd, depth=depth, width=0, scale=1.1, linear_fit=True)
86
 
87
  voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256
88
- print(f'voxel_size = {voxel_size:e}')
89
- mesh = mesh_raw.simplify_vertex_clustering(
90
- voxel_size=voxel_size,
91
- contraction=o3d.geometry.SimplificationContraction.Average)
92
-
93
- # vertices_to_remove = densities < np.quantile(densities, 0.001)
94
- # mesh.remove_vertices_by_mask(vertices_to_remove)
95
  bbox = pcd.get_axis_aligned_bounding_box()
96
  mesh_crop = mesh.crop(bbox)
97
  gltf_path = f'./{image_path.stem}.gltf'
98
- o3d.io.write_triangle_mesh(
99
- gltf_path, mesh_crop, write_triangle_uvs=True)
100
  return gltf_path
101
 
102
-
103
- title = "zero-shot depth estimation with DPT + 3D Point Cloud"
104
- description = "DPT model to predict the depth of an image and then 3D Point Cloud to create a 3D object."
105
-
106
- # Add both image and model examples
107
- examples = [
108
- ["examples/" + img] for img in os.listdir("files/")
109
- ] + [
110
- [os.path.join(os.path.dirname(__file__), "files/model1.glb")],
111
- [os.path.join(os.path.dirname(__file__), "files/model2.glb")],
112
- [os.path.join(os.path.dirname(__file__), "files/model3.glb")],
113
- [os.path.join(os.path.dirname(__file__), "files/model4.glb")],
114
- ["https://huggingface.co/datasets/dylanebert/3dgs/resolve/main/bonsai/bonsai-7k-mini.splat"],
115
- ]
116
-
117
- iface = gr.Interface(fn=process_image,
118
- inputs=[gr.Image(
119
- type="filepath", label="Input Image")],
120
- outputs=[gr.Image(label="predicted depth", type="pil"),
121
- gr.Model3D(label="3d mesh reconstruction", clear_color=[
122
- 1.0, 1.0, 1.0, 1.0]),
123
- gr.File(label="3d gLTF")],
124
- title=title,
125
- description=description,
126
- examples=examples,
127
- allow_flagging="never",
128
- cache_examples=False)
129
 
130
  if __name__ == "__main__":
131
  iface.launch()
@@ -134,3 +89,4 @@ if __name__ == "__main__":
134
 
135
 
136
 
 
 
5
  from PIL import Image
6
  import open3d as o3d
7
  from pathlib import Path
 
8
 
9
  feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
10
  model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
11
 
 
12
  def process_image(image_path):
13
  image_path = Path(image_path)
14
  image_raw = Image.open(image_path)
 
16
  (800, int(800 * image_raw.size[1] / image_raw.size[0])),
17
  Image.Resampling.LANCZOS)
18
 
 
19
  encoding = feature_extractor(image, return_tensors="pt")
20
+
 
21
  with torch.no_grad():
22
  outputs = model(**encoding)
23
  predicted_depth = outputs.predicted_depth
24
 
 
25
  prediction = torch.nn.functional.interpolate(
26
  predicted_depth.unsqueeze(1),
27
  size=image.size[::-1],
 
30
  ).squeeze()
31
  output = prediction.cpu().numpy()
32
  depth_image = (output * 255 / np.max(output)).astype('uint8')
33
+
34
  try:
35
  gltf_path = create_3d_obj(np.array(image), depth_image, image_path)
36
+ except Exception:
37
+ gltf_path = create_3d_obj(np.array(image), depth_image, image_path, depth=8)
38
+
39
+ return [Image.fromarray(depth_image), gltf_path, gltf_path]
 
 
 
 
 
 
 
40
 
41
  def create_3d_obj(rgb_image, depth_image, image_path, depth=10):
42
  depth_o3d = o3d.geometry.Image(depth_image)
43
  image_o3d = o3d.geometry.Image(rgb_image)
44
  rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
45
  image_o3d, depth_o3d, convert_rgb_to_intensity=False)
46
+ w, h = depth_image.shape[1], depth_image.shape[0]
 
47
 
48
  camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
49
  camera_intrinsic.set_intrinsics(w, h, 500, 500, w/2, h/2)
50
 
51
+ pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
 
 
 
 
 
52
  pcd.estimate_normals(
53
  search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))
54
+ pcd.orient_normals_towards_camera_location(camera_location=np.array([0., 0., 1000.]))
55
+
56
+ with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug):
57
+ mesh_raw, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
 
 
 
 
 
 
 
 
 
 
58
  pcd, depth=depth, width=0, scale=1.1, linear_fit=True)
59
 
60
  voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256
61
+ mesh = mesh_raw.simplify_vertex_clustering(voxel_size=voxel_size)
62
+
 
 
 
 
 
63
  bbox = pcd.get_axis_aligned_bounding_box()
64
  mesh_crop = mesh.crop(bbox)
65
  gltf_path = f'./{image_path.stem}.gltf'
66
+ o3d.io.write_triangle_mesh(gltf_path, mesh_crop, write_triangle_uvs=True)
 
67
  return gltf_path
68
 
69
+ title = "Zero-shot Depth Estimation with DPT + 3D Point Cloud"
70
+ description = "DPT model predicts depth from an image, followed by 3D Point Cloud reconstruction."
71
+
72
+ iface = gr.Interface(
73
+ fn=process_image,
74
+ inputs=[gr.Image(type="filepath", label="Input Image")],
75
+ outputs=[
76
+ gr.Image(label="Predicted Depth", type="pil"),
77
+ gr.Model3D(label="3D Mesh Reconstruction", clear_color=[1.0, 1.0, 1.0, 1.0]),
78
+ gr.File(label="3D gLTF")
79
+ ],
80
+ title=title,
81
+ description=description,
82
+ allow_flagging="never"
83
+ )
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  if __name__ == "__main__":
86
  iface.launch()
 
89
 
90
 
91
 
92
+