File size: 4,436 Bytes
3e9df0c
6ec98d8
 
 
 
 
 
854f993
3e9df0c
c86bde4
6ec98d8
 
c55986d
6ec98d8
c86bde4
 
 
 
 
 
 
6ec98d8
 
c86bde4
 
c55986d
6ec98d8
b9c32e6
6ec98d8
 
 
 
c86bde4
6ec98d8
 
 
 
 
 
 
c86bde4
 
 
 
 
b9c32e6
854f993
b9c32e6
854f993
 
baa52c5
 
6ec98d8
baa52c5
 
 
 
 
 
6ec98d8
baa52c5
 
 
c86bde4
baa52c5
 
 
 
c86bde4
b9c32e6
baa52c5
6ec98d8
baa52c5
854f993
baa52c5
 
 
 
 
c86bde4
854f993
 
baa52c5
854f993
 
 
 
 
 
 
 
 
 
baa52c5
 
 
 
6ec98d8
fe51ca1
854f993
fe51ca1
 
 
 
b9c32e6
c1dbba4
9fac7e4
fe51ca1
 
 
9fac7e4
fe51ca1
854f993
 
c1dbba4
fe51ca1
c1dbba4
fe51ca1
 
c1dbba4
f52a14b
5847e9e
 
c62d4d9
b9c32e6
c86bde4
9fac7e4
baa52c5
f93b7cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import gradio as gr
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
import torch
import numpy as np
from PIL import Image
import open3d as o3d
from pathlib import Path
import subprocess

# Load model and feature extractor
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")

def process_image(image_path):
    image_path = Path(image_path) if isinstance(image_path, str) else image_path
    try:
        image_raw = Image.open(image_path).convert("RGB")
    except Exception as e:
        return f"Error loading image: {e}"

    # Resize while maintaining aspect ratio
    image = image_raw.resize(
        (800, int(800 * image_raw.size[1] / image_raw.size[0])),
        Image.Resampling.LANCZOS
    )

    encoding = feature_extractor(image, return_tensors="pt")
    
    with torch.no_grad():
        outputs = model(**encoding)
        predicted_depth = outputs.predicted_depth

    # Normalize depth image
    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=image.size[::-1],
        mode="bicubic",
        align_corners=False,
    ).squeeze()
    output = prediction.cpu().numpy()
    
    if np.max(output) > 0:
        depth_image = (output * 255 / np.max(output)).astype('uint8')
    else:
        depth_image = np.zeros_like(output, dtype='uint8')  # Handle empty output
    
    glb_path = create_3d_obj(np.array(image), depth_image, image_path)
    
    if glb_path and Path(glb_path).exists():
        return Image.fromarray(depth_image), glb_path, glb_path
    else:
        return Image.fromarray(depth_image), None, "3D model generation failed"

def create_3d_obj(rgb_image, depth_image, image_path):
    try:
        depth_o3d = o3d.geometry.Image(depth_image)
        image_o3d = o3d.geometry.Image(rgb_image)
        rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
            image_o3d, depth_o3d, convert_rgb_to_intensity=False)

        w, h = depth_image.shape[1], depth_image.shape[0]
        camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
        camera_intrinsic.set_intrinsics(w, h, 500, 500, w / 2, h / 2)

        pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
        pcd.estimate_normals(
            search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))
        pcd.orient_normals_towards_camera_location(camera_location=np.array([0., 0., 1000.]))

        mesh_raw, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
            pcd, depth=10, width=0, scale=1.1, linear_fit=True)

        if not mesh_raw.has_triangles():
            print("Mesh generation failed: No triangles in mesh")
            return None  # Mesh generation failed
        
        # Center the mesh for better preview
        bbox = pcd.get_axis_aligned_bounding_box()
        mesh_raw.translate(-bbox.get_center())

        # Save the 3D model as .gltf
        gltf_path = str(Path.cwd() / f"{image_path.stem}.gltf")
        o3d.io.write_triangle_mesh(gltf_path, mesh_raw, write_triangle_uvs=True)

        # Convert .gltf to .glb
        glb_path = gltf_path.replace(".gltf", ".glb")
        subprocess.run(["npx", "gltf-pipeline", "-i", gltf_path, "-o", glb_path])

        if Path(glb_path).exists():
            return glb_path
        else:
            print("GLB conversion failed.")
            return None

    except Exception as e:
        print(f"3D model generation failed: {e}")
        return None

title = "Zero-shot Depth Estimation with DPT + 3D Model Preview"
description = "Upload an image to generate a depth map and reconstruct a 3D model in .glb format."

with gr.Blocks() as demo:
    gr.Markdown(f"## {title}")
    gr.Markdown(description)

    with gr.Row():
        with gr.Column(scale=1):
            image_input = gr.Image(type="filepath", label="Upload Image")
            generate_button = gr.Button("Generate 3D Model")
        
        with gr.Column(scale=2):
            depth_output = gr.Image(label="Predicted Depth", type="pil")
            model_output = gr.Model3D(label="3D Model Preview (GLB)")
            file_output = gr.File(label="Download 3D GLB File")

    generate_button.click(fn=process_image, inputs=[image_input], outputs=[depth_output, model_output, file_output])

if __name__ == "__main__":
    demo.launch()