Spaces:
Running
Running
File size: 4,436 Bytes
3e9df0c 6ec98d8 854f993 3e9df0c c86bde4 6ec98d8 c55986d 6ec98d8 c86bde4 6ec98d8 c86bde4 c55986d 6ec98d8 b9c32e6 6ec98d8 c86bde4 6ec98d8 c86bde4 b9c32e6 854f993 b9c32e6 854f993 baa52c5 6ec98d8 baa52c5 6ec98d8 baa52c5 c86bde4 baa52c5 c86bde4 b9c32e6 baa52c5 6ec98d8 baa52c5 854f993 baa52c5 c86bde4 854f993 baa52c5 854f993 baa52c5 6ec98d8 fe51ca1 854f993 fe51ca1 b9c32e6 c1dbba4 9fac7e4 fe51ca1 9fac7e4 fe51ca1 854f993 c1dbba4 fe51ca1 c1dbba4 fe51ca1 c1dbba4 f52a14b 5847e9e c62d4d9 b9c32e6 c86bde4 9fac7e4 baa52c5 f93b7cc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import gradio as gr
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
import torch
import numpy as np
from PIL import Image
import open3d as o3d
from pathlib import Path
import subprocess
# Load model and feature extractor
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
def process_image(image_path):
image_path = Path(image_path) if isinstance(image_path, str) else image_path
try:
image_raw = Image.open(image_path).convert("RGB")
except Exception as e:
return f"Error loading image: {e}"
# Resize while maintaining aspect ratio
image = image_raw.resize(
(800, int(800 * image_raw.size[1] / image_raw.size[0])),
Image.Resampling.LANCZOS
)
encoding = feature_extractor(image, return_tensors="pt")
with torch.no_grad():
outputs = model(**encoding)
predicted_depth = outputs.predicted_depth
# Normalize depth image
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=image.size[::-1],
mode="bicubic",
align_corners=False,
).squeeze()
output = prediction.cpu().numpy()
if np.max(output) > 0:
depth_image = (output * 255 / np.max(output)).astype('uint8')
else:
depth_image = np.zeros_like(output, dtype='uint8') # Handle empty output
glb_path = create_3d_obj(np.array(image), depth_image, image_path)
if glb_path and Path(glb_path).exists():
return Image.fromarray(depth_image), glb_path, glb_path
else:
return Image.fromarray(depth_image), None, "3D model generation failed"
def create_3d_obj(rgb_image, depth_image, image_path):
try:
depth_o3d = o3d.geometry.Image(depth_image)
image_o3d = o3d.geometry.Image(rgb_image)
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
image_o3d, depth_o3d, convert_rgb_to_intensity=False)
w, h = depth_image.shape[1], depth_image.shape[0]
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
camera_intrinsic.set_intrinsics(w, h, 500, 500, w / 2, h / 2)
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
pcd.estimate_normals(
search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))
pcd.orient_normals_towards_camera_location(camera_location=np.array([0., 0., 1000.]))
mesh_raw, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
pcd, depth=10, width=0, scale=1.1, linear_fit=True)
if not mesh_raw.has_triangles():
print("Mesh generation failed: No triangles in mesh")
return None # Mesh generation failed
# Center the mesh for better preview
bbox = pcd.get_axis_aligned_bounding_box()
mesh_raw.translate(-bbox.get_center())
# Save the 3D model as .gltf
gltf_path = str(Path.cwd() / f"{image_path.stem}.gltf")
o3d.io.write_triangle_mesh(gltf_path, mesh_raw, write_triangle_uvs=True)
# Convert .gltf to .glb
glb_path = gltf_path.replace(".gltf", ".glb")
subprocess.run(["npx", "gltf-pipeline", "-i", gltf_path, "-o", glb_path])
if Path(glb_path).exists():
return glb_path
else:
print("GLB conversion failed.")
return None
except Exception as e:
print(f"3D model generation failed: {e}")
return None
title = "Zero-shot Depth Estimation with DPT + 3D Model Preview"
description = "Upload an image to generate a depth map and reconstruct a 3D model in .glb format."
with gr.Blocks() as demo:
gr.Markdown(f"## {title}")
gr.Markdown(description)
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(type="filepath", label="Upload Image")
generate_button = gr.Button("Generate 3D Model")
with gr.Column(scale=2):
depth_output = gr.Image(label="Predicted Depth", type="pil")
model_output = gr.Model3D(label="3D Model Preview (GLB)")
file_output = gr.File(label="Download 3D GLB File")
generate_button.click(fn=process_image, inputs=[image_input], outputs=[depth_output, model_output, file_output])
if __name__ == "__main__":
demo.launch()
|