Spaces:
Sleeping
Sleeping
File size: 3,258 Bytes
3e9df0c 6ec98d8 3e9df0c 6ec98d8 c55986d 6ec98d8 c55986d 6ec98d8 b9c32e6 6ec98d8 b9c32e6 6ec98d8 b9c32e6 6ec98d8 b9c32e6 6ec98d8 b9c32e6 6ec98d8 b9c32e6 6ec98d8 b9c32e6 6ec98d8 b9c32e6 6ec98d8 b9c32e6 fe3c2bc 858ea72 6ec98d8 f52a14b 5847e9e c62d4d9 b9c32e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import gradio as gr
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
import torch
import numpy as np
from PIL import Image
import open3d as o3d
from pathlib import Path
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
def process_image(image_path):
image_path = Path(image_path)
image_raw = Image.open(image_path)
image = image_raw.resize(
(800, int(800 * image_raw.size[1] / image_raw.size[0])),
Image.Resampling.LANCZOS)
encoding = feature_extractor(image, return_tensors="pt")
with torch.no_grad():
outputs = model(**encoding)
predicted_depth = outputs.predicted_depth
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=image.size[::-1],
mode="bicubic",
align_corners=False,
).squeeze()
output = prediction.cpu().numpy()
depth_image = (output * 255 / np.max(output)).astype('uint8')
try:
gltf_path = create_3d_obj(np.array(image), depth_image, image_path)
except Exception:
gltf_path = create_3d_obj(np.array(image), depth_image, image_path, depth=8)
return [Image.fromarray(depth_image), gltf_path, gltf_path]
def create_3d_obj(rgb_image, depth_image, image_path, depth=10):
depth_o3d = o3d.geometry.Image(depth_image)
image_o3d = o3d.geometry.Image(rgb_image)
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
image_o3d, depth_o3d, convert_rgb_to_intensity=False)
w, h = depth_image.shape[1], depth_image.shape[0]
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
camera_intrinsic.set_intrinsics(w, h, 500, 500, w/2, h/2)
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
pcd.estimate_normals(
search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))
pcd.orient_normals_towards_camera_location(camera_location=np.array([0., 0., 1000.]))
with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug):
mesh_raw, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
pcd, depth=depth, width=0, scale=1.1, linear_fit=True)
voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256
mesh = mesh_raw.simplify_vertex_clustering(voxel_size=voxel_size)
bbox = pcd.get_axis_aligned_bounding_box()
mesh_crop = mesh.crop(bbox)
gltf_path = f'./{image_path.stem}.gltf'
o3d.io.write_triangle_mesh(gltf_path, mesh_crop, write_triangle_uvs=True)
return gltf_path
title = "Zero-shot Depth Estimation with DPT + 3D Point Cloud"
description = "DPT model predicts depth from an image, followed by 3D Point Cloud reconstruction."
iface = gr.Interface(
fn=process_image,
inputs=[gr.Image(type="filepath", label="Input Image")],
outputs=[
gr.Image(label="Predicted Depth", type="pil"),
gr.Model3D(label="3D Mesh Reconstruction", clear_color=[1.0, 1.0, 1.0, 1.0]),
gr.File(label="3D gLTF")
],
title=title,
description=description,
allow_flagging="never"
)
if __name__ == "__main__":
iface.launch()
|