Spaces:

xiexh20
/

HDM-interaction-recon

Sleeping

App Files Files Community

xiexh20 commited on Mar 8, 2024

Commit

cd9aff3

1 Parent(s): 0732f1c

add examples models

Browse files

Files changed (12) hide show

app.py +51 -26
demo.py +9 -2
examples/002446/k1.color.jpg +0 -0
examples/002446/k1.color.json +79 -0
examples/002446/k1.obj_rend_mask.png +0 -0
examples/002446/k1.person_mask.png +0 -0
examples/053431/k1.color.jpg +0 -0
examples/053431/k1.obj_rend_mask.png +0 -0
examples/053431/k1.person_mask.png +0 -0
examples/158107/k1.color.jpg +0 -0
examples/158107/k1.obj_rend_mask.png +0 -0
examples/158107/k1.person_mask.png +0 -0

app.py CHANGED Viewed

@@ -22,6 +22,7 @@ import imageio
 import gradio as gr
 import plotly.graph_objs as go
 import training_utils
 from configs.structured import ProjectConfig
 from demo import DemoRunner
@@ -91,7 +92,7 @@ def plot_points(colors, coords):
     return fig
-def inference(runner: DemoRunner, cfg: ProjectConfig, rgb, mask_hum, mask_obj, std_coverage, input_seed):
     """
     given user input, run inference
     :param runner:
@@ -101,26 +102,38 @@ def inference(runner: DemoRunner, cfg: ProjectConfig, rgb, mask_hum, mask_obj, s
     :param mask_obj: (h, w, 3), np array
     :param std_coverage: float value, used to estimate camera translation
     :param input_seed: random seed
     :return: path to the 3D reconstruction, and an interactive 3D figure for visualizing the point cloud
     """
-    # Set random seed
-    training_utils.set_seed(int(input_seed))
-    data = DemoDataset([], (cfg.dataset.image_size, cfg.dataset.image_size),
                            std_coverage)
-    batch = data.image2batch(rgb, mask_hum, mask_obj)
-    out_stage1, out_stage2 = runner.forward_batch(batch, cfg)
-    points = out_stage2.points_packed().cpu().numpy()
-    colors = out_stage2.features_packed().cpu().numpy()
-    fig = plot_points(colors, points)
-    # save tmp point cloud
-    outdir = './results'
-    os.makedirs(outdir, exist_ok=True)
-    trimesh.PointCloud(points, colors).export(outdir + f"/pred_std{std_coverage}_seed{input_seed}_stage2.ply")
-    trimesh.PointCloud(out_stage1.points_packed().cpu().numpy(),
-                       out_stage1.features_packed().cpu().numpy()).export(outdir + f"/pred_std{std_coverage}_seed{input_seed}_stage1.ply")
-    return fig, outdir + f"/pred_std{std_coverage}_seed{input_seed}_stage2.ply"
 @hydra.main(config_path='configs', config_name='configs', version_base='1.1')
@@ -129,6 +142,8 @@ def main(cfg: ProjectConfig):
     runner = DemoRunner(cfg)
     # runner = None # without model initialization, it shows one line of thumbnail
     # Setup interface
     demo = gr.Blocks(title="HDM Interaction Reconstruction Demo")
@@ -147,33 +162,43 @@ def main(cfg: ProjectConfig):
                 # TODO: add hint for this value here
                 input_std = gr.Number(label='Gaussian std coverage', value=3.5)
                 input_seed = gr.Number(label='Random seed', value=42)
         # Output visualization
         with gr.Row():
             pc_plot = gr.Plot(label="Reconstructed point cloud")
             out_pc_download = gr.File(label="3D reconstruction for download") # this allows downloading
         gr.HTML("""<br/>""")
         # Control
         with gr.Row():
             button_recon = gr.Button("Start Reconstruction", interactive=True, variant='secondary')
             button_recon.click(fn=partial(inference, runner, cfg),
-                               inputs=[input_rgb, input_mask_hum, input_mask_obj, input_std, input_seed],
-                               outputs=[pc_plot, out_pc_download])
         gr.HTML("""<br/>""")
         # Example input
         example_dir = cfg.run.code_dir_abs+"/examples"
         rgb, ps, obj = 'k1.color.jpg', 'k1.person_mask.png', 'k1.obj_rend_mask.png'
         example_images = gr.Examples([
-            [f"{example_dir}/017450/{rgb}", f"{example_dir}/017450/{ps}", f"{example_dir}/017450/{obj}", 3.0, 42],
-            [f"{example_dir}/002446/{rgb}", f"{example_dir}/002446/{ps}", f"{example_dir}/002446/{obj}", 3.0, 42],
-            [f"{example_dir}/053431/{rgb}", f"{example_dir}/053431/{ps}", f"{example_dir}/053431/{obj}", 3.8, 42],
-            [f"{example_dir}/158107/{rgb}", f"{example_dir}/158107/{ps}", f"{example_dir}/158107/{obj}", 3.8, 42],
-        ], inputs=[input_rgb, input_mask_hum, input_mask_obj, input_std, input_seed],)
     # demo.launch(share=True)
     # Enabling queue for runtime>60s, see: https://github.com/tloen/alpaca-lora/issues/60#issuecomment-1510006062
-    demo.queue(concurrency_count=3).launch(share=True)
 if __name__ == '__main__':
-    main()

 import gradio as gr
 import plotly.graph_objs as go
 import training_utils
+import traceback
 from configs.structured import ProjectConfig
 from demo import DemoRunner
     return fig
+def inference(runner: DemoRunner, cfg: ProjectConfig, rgb, mask_hum, mask_obj, std_coverage, input_seed, input_cls):
     """
     given user input, run inference
     :param runner:
     :param mask_obj: (h, w, 3), np array
     :param std_coverage: float value, used to estimate camera translation
     :param input_seed: random seed
+    :param input_cls: the object category of the input image
     :return: path to the 3D reconstruction, and an interactive 3D figure for visualizing the point cloud
     """
+    log = ""
+    try:
+        # Set random seed
+        training_utils.set_seed(int(input_seed))
+        data = DemoDataset([], (cfg.dataset.image_size, cfg.dataset.image_size),
                            std_coverage)
+        batch = data.image2batch(rgb, mask_hum, mask_obj)
+        if input_cls != 'general':
+            log += f"Reloading fine-tuned checkpoint of category {input_cls}\n"
+            runner.reload_checkpoint(input_cls)
+        out_stage1, out_stage2 = runner.forward_batch(batch, cfg)
+        points = out_stage2.points_packed().cpu().numpy()
+        colors = out_stage2.features_packed().cpu().numpy()
+        fig = plot_points(colors, points)
+        # save tmp point cloud
+        outdir = './results'
+        os.makedirs(outdir, exist_ok=True)
+        trimesh.PointCloud(points, colors).export(outdir + f"/pred_std{std_coverage}_seed{input_seed}_stage2_{input_cls}.ply")
+        trimesh.PointCloud(out_stage1.points_packed().cpu().numpy(),
+                           out_stage1.features_packed().cpu().numpy()).export(
+            outdir + f"/pred_std{std_coverage}_seed{input_seed}_stage1_{input_cls}.ply")
+        log += 'Successfully reconstructed the image.'
+    except Exception as e:
+        log = traceback.format_exc()
+    return fig, outdir + f"/pred_std{std_coverage}_seed{input_seed}_stage2_{input_cls}.ply", log
 @hydra.main(config_path='configs', config_name='configs', version_base='1.1')
     runner = DemoRunner(cfg)
     # runner = None # without model initialization, it shows one line of thumbnail
+    # TODO: add instructions on how to get masks
+    # TODO: add instructions on how to use the demo, input output, example outputs etc.
     # Setup interface
     demo = gr.Blocks(title="HDM Interaction Reconstruction Demo")
                 # TODO: add hint for this value here
                 input_std = gr.Number(label='Gaussian std coverage', value=3.5)
                 input_seed = gr.Number(label='Random seed', value=42)
+                # TODO: add description outside label
+                input_cls = gr.Dropdown(label='Object category (we have fine tuned the model for specific categories, '
+                                              'reconstructing with these model should lead to better result '
+                                              'for specific categories.) ',
+                                        choices=['general', 'backpack', 'ball', 'bottle', 'box',
+                                                 'chair', 'skateboard', 'suitcase', 'table'],
+                                        value='general')
         # Output visualization
         with gr.Row():
             pc_plot = gr.Plot(label="Reconstructed point cloud")
             out_pc_download = gr.File(label="3D reconstruction for download") # this allows downloading
+        with gr.Row():
+            out_log = gr.TextArea(label='Output log')
         gr.HTML("""<br/>""")
         # Control
         with gr.Row():
             button_recon = gr.Button("Start Reconstruction", interactive=True, variant='secondary')
             button_recon.click(fn=partial(inference, runner, cfg),
+                               inputs=[input_rgb, input_mask_hum, input_mask_obj, input_std, input_seed, input_cls],
+                               outputs=[pc_plot, out_pc_download, out_log])
         gr.HTML("""<br/>""")
         # Example input
         example_dir = cfg.run.code_dir_abs+"/examples"
         rgb, ps, obj = 'k1.color.jpg', 'k1.person_mask.png', 'k1.obj_rend_mask.png'
         example_images = gr.Examples([
+            [f"{example_dir}/017450/{rgb}", f"{example_dir}/017450/{ps}", f"{example_dir}/017450/{obj}", 3.0, 42, 'skateboard'],
+            [f"{example_dir}/002446/{rgb}", f"{example_dir}/002446/{ps}", f"{example_dir}/002446/{obj}", 3.0, 42, 'ball'],
+            [f"{example_dir}/053431/{rgb}", f"{example_dir}/053431/{ps}", f"{example_dir}/053431/{obj}", 3.8, 42, 'chair'],
+            [f"{example_dir}/158107/{rgb}", f"{example_dir}/158107/{ps}", f"{example_dir}/158107/{obj}", 3.8, 42, 'chair'],
+        ], inputs=[input_rgb, input_mask_hum, input_mask_obj, input_std, input_seed, input_cls],)
     # demo.launch(share=True)
     # Enabling queue for runtime>60s, see: https://github.com/tloen/alpaca-lora/issues/60#issuecomment-1510006062
+    demo.queue().launch(share=True)
 if __name__ == '__main__':
+    main()

demo.py CHANGED Viewed

@@ -65,8 +65,8 @@ class DemoRunner:
         self.rend_size = cfg.dataset.image_size
         self.device = 'cuda'
-    def load_checkpoint(self, ckpt_file1, model_stage1):
-        checkpoint = torch.load(ckpt_file1, map_location='cpu')
         state_dict, key = checkpoint['model'], 'model'
         if any(k.startswith('module.') for k in state_dict.keys()):
             state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
@@ -78,6 +78,13 @@ class DemoRunner:
         if len(unexpected_keys):
             print(f' - Unexpected_keys: {unexpected_keys}')
     @torch.no_grad()
     def run(self):
         "simply run the demo on given images, and save the results"

         self.rend_size = cfg.dataset.image_size
         self.device = 'cuda'
+    def load_checkpoint(self, ckpt_file1, model_stage1, device='cpu'):
+        checkpoint = torch.load(ckpt_file1, map_location=device)
         state_dict, key = checkpoint['model'], 'model'
         if any(k.startswith('module.') for k in state_dict.keys()):
             state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
         if len(unexpected_keys):
             print(f' - Unexpected_keys: {unexpected_keys}')
+    def reload_checkpoint(self, cat_name):
+        "load checkpoint of models fine tuned on specific categories"
+        ckpt_file1 = hf_hub_download("xiexh20/HDM-models", f'{self.cfg.run.stage1_name}-{cat_name}.pth')
+        self.load_checkpoint(ckpt_file1, self.model_stage1, device=self.device)
+        ckpt_file2 = hf_hub_download("xiexh20/HDM-models", f'{self.cfg.run.stage2_name}-{cat_name}.pth')
+        self.load_checkpoint(ckpt_file2, self.model_stage2, device=self.device)
     @torch.no_grad()
     def run(self):
         "simply run the demo on given images, and save the results"

examples/002446/k1.color.jpg ADDED Viewed

examples/002446/k1.color.json ADDED Viewed

	@@ -0,0 +1,79 @@

+{
+  "body_joints": [
+    362.91015625,
+    159.39576721191406,
+    0.9023686647415161,
+    373.57745361328125,
+    180.60316467285156,
+    0.8592674136161804,
+    333.528564453125,
+    179.45702362060547,
+    0.7867028713226318,
+    278.2209167480469,
+    207.63121032714844,
+    0.8840203285217285,
+    228.78005981445312,
+    234.69793701171875,
+    0.8324164152145386,
+    417.08209228515625,
+    181.77294921875,
+    0.7164953947067261,
+    477.138427734375,
+    199.3846893310547,
+    0.7733086347579956,
+    539.4710083007812,
+    219.44891357421875,
+    0.8321817517280579,
+    401.8182678222656,
+    288.8574676513672,
+    0.61277836561203,
+    382.9984436035156,
+    294.7460632324219,
+    0.5884051322937012,
+    388.8341979980469,
+    377.1164245605469,
+    0.8282020092010498,
+    488.86529541015625,
+    404.145751953125,
+    0.6257187724113464,
+    420.6218566894531,
+    282.9443664550781,
+    0.5774698257446289,
+    455.9610290527344,
+    361.8221130371094,
+    0.8058001399040222,
+    557.13916015625,
+    339.43017578125,
+    0.69627445936203,
+    352.3575134277344,
+    151.14682006835938,
+    0.9335765242576599,
+    371.185791015625,
+    146.48798370361328,
+    0.8626495003700256,
+    342.9620666503906,
+    150.00089263916016,
+    0.0641486719250679,
+    390.03204345703125,
+    135.8568878173828,
+    0.8869808316230774,
+    595.938720703125,
+    338.2825012207031,
+    0.25365617871284485,
+    594.7731323242188,
+    334.75506591796875,
+    0.23056654632091522,
+    561.8401489257812,
+    331.20794677734375,
+    0.29395991563796997,
+    484.1672058105469,
+    435.9705810546875,
+    0.6335450410842896,
+    479.44921875,
+    433.6032409667969,
+    0.5307492017745972,
+    501.7928466796875,
+    398.28533935546875,
+    0.5881072878837585
+  ]
+}

examples/002446/k1.obj_rend_mask.png ADDED Viewed

examples/002446/k1.person_mask.png ADDED Viewed

examples/053431/k1.color.jpg ADDED Viewed

examples/053431/k1.obj_rend_mask.png ADDED Viewed

examples/053431/k1.person_mask.png ADDED Viewed

examples/158107/k1.color.jpg ADDED Viewed

examples/158107/k1.obj_rend_mask.png ADDED Viewed

examples/158107/k1.person_mask.png ADDED Viewed