Spaces:

JUGGHM
/

Metric3D

Runtime error

App Files Files Community

JUGGHM commited on Apr 28, 2024

Commit

65edc3a

verified ·

1 Parent(s): e8d42f0

Update reconstruction (beta version)

Browse files

Files changed (1) hide show

app.py +109 -15

app.py CHANGED Viewed

@@ -54,7 +54,10 @@ device = "cuda"
 model_large.to(device)
 model_small.to(device)
-def depth_normal(img, model_selection="vit-small"):
     if model_selection == "vit-small":
         model = model_small
         cfg = cfg_small
@@ -65,7 +68,10 @@ def depth_normal(img, model_selection="vit-small"):
     else:
         raise NotImplementedError
     cv_image = np.array(img)
     img = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)
     intrinsic = [1000.0, 1000.0, img.shape[1]/2, img.shape[0]/2]
     rgb_input, cam_models_stacks, pad, label_scale_factor = transform_test_data_scalecano(img, intrinsic, cfg.data_basic)
@@ -89,37 +95,125 @@ def depth_normal(img, model_selection="vit-small"):
     pred_depth = pred_depth.squeeze().cpu().numpy()
     pred_depth[pred_depth<0] = 0
     pred_color = gray_to_colormap(pred_depth)
     pred_normal = pred_normal.squeeze()
     if pred_normal.size(0) == 3:
         pred_normal = pred_normal.permute(1,2,0)
     pred_color_normal = vis_surface_normal(pred_normal)
-    ##formatted = (output * 255 / np.max(output)).astype('uint8')
     img = Image.fromarray(pred_color)
     img_normal = Image.fromarray(pred_color_normal)
-    return img, img_normal
-#inputs =  gr.inputs.Image(type='pil', label="Original Image")
-#depth = gr.outputs.Image(type="pil",label="Output Depth")
-#normal = gr.outputs.Image(type="pil",label="Output Normal")
 title = "Metric3D"
-description = "Gradio demo for Metric3D v1/v2 which takes in a single image for computing metric depth and surface normal. To use it, simply upload your image, or click one of the examples to load them. Learn more from our paper linked below."
 article = "<p style='text-align: center'><a href='https://arxiv.org/pdf/2307.10984.pdf'>Metric3D: Towards Zero-shot Metric 3D Prediction from A Single Image</a> | <a href='https://github.com/YvanYin/Metric3D'>Github Repo</a></p>"
 examples = [
-    #["turtle.jpg"],
-    #["lions.jpg"]
-    #["files/gundam.jpg"],
     ["files/museum.jpg"],
     ["files/terra.jpg"],
     ["files/underwater.jpg"],
     ["files/venue.jpg"]
 ]
-gr.Interface(
-    depth_normal,
-    inputs=[gr.Image(type='pil', label="Original Image"), gr.Dropdown(["vit-small", "vit-large"], label="Model", info="Select a model type", value="vit-large")],
-    outputs=[gr.Image(type="pil",label="Output Depth"), gr.Image(type="pil",label="Output Normal")],
-    title=title, description=description, article=article, examples=examples, analytics_enabled=False).launch()

 model_large.to(device)
 model_small.to(device)
+outputs_dir = "./outs"
+def depth_normal(img_path, model_selection="vit-small"):
     if model_selection == "vit-small":
         model = model_small
         cfg = cfg_small
     else:
         raise NotImplementedError
+    img = Image.open(img_path)
     cv_image = np.array(img)
+    img = cv_image
     img = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)
     intrinsic = [1000.0, 1000.0, img.shape[1]/2, img.shape[0]/2]
     rgb_input, cam_models_stacks, pad, label_scale_factor = transform_test_data_scalecano(img, intrinsic, cfg.data_basic)
     pred_depth = pred_depth.squeeze().cpu().numpy()
     pred_depth[pred_depth<0] = 0
     pred_color = gray_to_colormap(pred_depth)
+    ##formatted = (output * 255 / np.max(output)).astype('uint8')
+    path_output_dir = os.path.splitext(os.path.basename(img_path))[0] + datetime.now().strftime('%Y%m%d-%H%M%S')
+    path_output_dir = os.path.join(path_output_dir, outputs_dir)
+    os.makedirs(path_output_dir, exist_ok=True)
+    name_base = os.path.splitext(os.path.basename(img_path))[0]
+    depth_np = pred_depth
+    normal_np = torch.nn.functional.interpolate(pred_normal, [img.shape[0], img.shape[1]], mode='bilinear').squeeze().cpu().numpy()
+    normal_np = normal_np.transpose(1,2,0)
     pred_normal = pred_normal.squeeze()
     if pred_normal.size(0) == 3:
         pred_normal = pred_normal.permute(1,2,0)
     pred_color_normal = vis_surface_normal(pred_normal)
+    depth_path = os.path.join(path_output_dir, f"{name_base}_depth.npy")
+    normal_path = os.path.join(path_output_dir, f"{name_base}_normal.npy")
+    np.save(normal_path, normal_np)
+    np.save(depth_path, depth_np)
+    ori_w = img.shape[1]
+    ori_h = img.shape[0]
     img = Image.fromarray(pred_color)
+    #img = img.resize((int(300 * ori_w/ ori_h), 300))
     img_normal = Image.fromarray(pred_color_normal)
+    #img_normal = img_normal.resize((int(300 * ori_w/ ori_h), 300))
+    return img, img_normal, [depth_path, normal_path]
+def reconstruction(img_path, files, focal_length, reconstructed_file):
+    img = Image.open(img_path)
+    cv_image = np.array(img)
+    img = cv_image
+    depth_np = np.load(files[0])
+    pcd = reconstruct_pcd(depth_np * focal_length / 1000, focal_length, focal_length, img.shape[1]/2, img.shape[0]/2)
+    pcd_path = files[0].replace('_depth.npy', '.ply')
+    save_point_cloud(pcd.reshape((-1, 3)), img.reshape(-1, 3), pcd_path)
+    return [pcd_path]
 title = "Metric3D"
+description = "Gradio demo for Metric3D which takes in a single image for computing metric depth and surface normal. To use it, simply upload your image, or click one of the examples to load them. Learn more from our paper linked below."
 article = "<p style='text-align: center'><a href='https://arxiv.org/pdf/2307.10984.pdf'>Metric3D: Towards Zero-shot Metric 3D Prediction from A Single Image</a> | <a href='https://github.com/YvanYin/Metric3D'>Github Repo</a></p>"
 examples = [
     ["files/museum.jpg"],
     ["files/terra.jpg"],
     ["files/underwater.jpg"],
     ["files/venue.jpg"]
 ]
+def run_demo():
+    _TITLE = '''Metric3Dv2: A versatile monocular geometric foundation model for zero-shot metric depth and surface normal estimation'''
+    _DESCRIPTION = description
+    with gr.Blocks(title=_TITLE) as demo:
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown('# ' + _TITLE)
+        gr.Markdown(_DESCRIPTION)
+        with gr.Row(variant='panel'):
+            with gr.Column(scale=1):
+                #input_image = gr.Image(type='pil', label='Original Image')
+                input_image = gr.Image(type='filepath', height=300, label='Input image')
+                example_folder = os.path.join(os.path.dirname(__file__), "./files")
+                example_fns = [os.path.join(example_folder, example) for example in os.listdir(example_folder)]
+                gr.Examples(
+                    examples=example_fns,
+                    inputs=[input_image],
+                    cache_examples=False,
+                    label='Examples (click one of the images below to start)',
+                    examples_per_page=30
+                )
+                model_choice = gr.Dropdown(["vit-small", "vit-large"], label="Model", info="Select a model type",  value="vit-small")
+                run_btn = gr.Button('Predict', variant='primary', interactive=True)
+            with gr.Column(scale=1):
+                depth = gr.Image(interactive=False, label="Depth")
+                normal = gr.Image(interactive=False, label="Normal")
+        with gr.Row():
+            files = gr.Files(
+                label = "Depth and Normal (numpy)",
+                elem_id = "download",
+                interactive=False,
+            )
+        with gr.Row():
+            recon_btn = gr.Button('Is focal length available? If Yes, Enter and Click Here for Metric 3D Reconstruction', variant='primary', interactive=True)
+            focal_length = gr.Number(value=1000, label="Focal Length")
+        with gr.Row():
+            reconstructed_file = gr.Files(
+                label = "3D pointclouds (plyfile)",
+                elem_id = "download",
+                interactive=False
+            )
+        run_btn.click(fn=depth_normal,
+                        inputs=[input_image,
+                                model_choice],
+                        outputs=[depth, normal, files]
+                    )
+        recon_btn.click(fn=reconstruction,
+                        inputs=[input_image, files, focal_length],
+                        outputs=[reconstructed_file]
+                        )
+        demo.queue().launch(share=True, max_threads=80)
+if __name__ == '__main__':
+    fire.Fire(run_demo)