diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b6542f5205d3f25c9f5feee8881141f3e09947d8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +.gradio/ \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..bd49852ff7a9670906764543a203ef8f9b075b88 --- /dev/null +++ b/app.py @@ -0,0 +1,382 @@ +import argparse +import numpy as np +import gradio as gr +import torch +import torch.nn.functional as F +import open3d as o3d +import time +import cv2 +import math +import os +import yaml +import shutil +import sys +import glob +from functools import partial +import copy +from PIL import Image + + +class DynamicsVisualizer: + + def __init__(self): + device = torch.device("cpu") + self.device = device + self.width = 640 + self.height = 480 + + self.vis_cam_id = 1 + self.bg_id = 0 # 0: black, 1: white + + self.imgs = None + self.gs_orig = None + self.gs_pred = None + self.actions = None + self.videos = None + + self.example_name = None + self.action_name = None + + self.form_image_is_set = False + self.form_video_is_set = False + self.form_3dgs_orig_is_set = False + self.form_3dgs_pred_is_set = False + + def load_example(self): + example_path = os.path.join('data', self.example_name) + + self.imgs = [Image.open(os.path.join(example_path, f'img_{i}.png')) for i in range(4)] + self.gs_orig = os.path.join(example_path, 'gs_orig.splat') + + def load_action(self): + action_path = os.path.join('data', self.action_name) + + self.imgs = [Image.open(os.path.join(action_path, f'img_{i}.png')) for i in range(4)] + self.videos = [os.path.join(action_path, f'video_{i}.mp4') for i in range(4)] + self.gs_pred = os.path.join(action_path, 'gs_pred.splat') + + def reset(self): + self.imgs = None + self.gs_orig = None + self.gs_pred = None + self.actions = None + self.videos = None + self.vis_cam_id = 1 + self.bg_id = 0 # 0: black, 1: white + self.example_name = None + self.action_name = None + form_image = gr.Image(label='Initial state and actions', value=None, width=self.width, height=self.height) + form_video = gr.Video(label='Predicted video', value=None, width=self.width, height=self.height) + form_3dgs_orig = gr.Model3D(label='Original Gaussian Splats', value=None) + form_3dgs_pred = gr.Model3D(label='Predicted Gaussian Splats', value=None) + self.form_image_is_set = False + self.form_video_is_set = False + self.form_3dgs_orig_is_set = False + self.form_3dgs_pred_is_set = False + return form_image, form_video, form_3dgs_orig, form_3dgs_pred + + def on_click_set_example(self, state): + self.example_name = f"{int(state['example_id'])}" + self.load_example() + + init_image = self.imgs[self.vis_cam_id] + form_image = gr.Image(label='Initial state and actions', value=init_image, width=self.width, height=self.height) + + form_video = gr.Video(label='Predicted video', value=None, width=self.width, height=self.height) + form_3dgs_orig = gr.Model3D(label='Original Gaussian Splats', value=self.gs_orig, clear_color=[self.bg_id, self.bg_id, self.bg_id, 0]) + form_3dgs_pred = gr.Model3D(label='Predicted Gaussian Splats', value=None) + + self.form_image_is_set = True + self.form_video_is_set = False + self.form_3dgs_orig_is_set = True + self.form_3dgs_pred_is_set = False + + return form_image, form_video, form_3dgs_orig, form_3dgs_pred + + def on_click_set_action(self, state): + self.action_name = f"{self.example_name}/action-{int(state['action_id'])}" + self.load_action() + + action_image = self.imgs[self.vis_cam_id] + form_image = gr.Image(label='Initial state and actions', value=action_image, width=self.width, height=self.height) + self.form_image_is_set = True + + return form_image + + def on_click_run(self): + form_video = gr.Video(label='Predicted video', value=self.videos[self.vis_cam_id], width=self.width, height=self.height) + form_3dgs_pred = gr.Model3D(label='Predicted Gaussian Splats', value=self.gs_pred, clear_color=[self.bg_id, self.bg_id, self.bg_id, 0]) + self.form_video_is_set = True + self.form_3dgs_pred_is_set = True + + return form_video, form_3dgs_pred + + def on_click_change_view(self, state): + self.vis_cam_id = int(state['view_id']) + form_image = gr.Image(label='Initial state and actions', value=self.imgs[self.vis_cam_id], width=self.width, height=self.height) + if self.form_video_is_set: + form_video = gr.Video(label='Predicted video', value=self.videos[self.vis_cam_id], width=self.width, height=self.height) + else: + form_video = gr.Video(label='Predicted video', value=None, width=self.width, height=self.height) + return form_image, form_video + + # def on_click_change_bg(self): + # if self.bg_id == 0: + # self.bg_id = 1 + # else: + # self.bg_id = 0 + + # if self.form_3dgs_orig_is_set: + # form_3dgs_orig = gr.Model3D(value=self.gs_orig, clear_color=[self.bg_id, self.bg_id, self.bg_id, 0]) + # else: + # form_3dgs_orig = gr.Model3D(value=None) + + # if self.form_3dgs_pred_is_set: + # form_3dgs_pred = gr.Model3D(value=self.gs_pred, clear_color=[self.bg_id, self.bg_id, self.bg_id, 0]) + # else: + # form_3dgs_pred = gr.Model3D(value=None) + + # return form_3dgs_orig, form_3dgs_pred + + def launch(self, share=False): + with gr.Blocks() as app: + + # with gr.Row(): + # with gr.Column(scale=2): + # run_reset = gr.Button('Clear All') + + # with gr.Column(scale=2): + # run_set_example = gr.Button('Set Example') + + # with gr.Column(scale=2): + # run_set_action = gr.Button('Set Action') + + # with gr.Column(scale=2): + # run_run = gr.Button('Run') + + # with gr.Row(): + # with gr.Column(scale=1, min_width=20): + # with gr.Row(): + # run_view_0 = gr.Button('View 0') + + # with gr.Row(): + # run_view_1 = gr.Button('View 1') + + # with gr.Row(): + # run_view_2 = gr.Button('View 2') + + # with gr.Row(): + # run_view_3 = gr.Button('View 3') + + with gr.Row(): + gr.Markdown("# Dynamic 3D Gaussian Tracking for Graph-Based Neural Dynamics Modeling") + + with gr.Row(): + gr.Markdown('Project page: [https://gs-dynamics.github.io/](https://gs-dynamics.github.io/)') + + with gr.Row(): + gr.Markdown() + + with gr.Row(): + gr.Markdown() + + with gr.Row(): + with gr.Column(scale=1): + gr.Markdown("**Step 0**: click **Clear All** to clear all window and reset the visualizer.") + + with gr.Column(scale=1): + run_reset = gr.Button('Clear All') + + + with gr.Row(): + with gr.Column(scale=1): + gr.Markdown("**Step 1**: select the object.") + + with gr.Column(scale=1): + run_set_example_0 = gr.Button('Rope') + + with gr.Column(scale=1): + run_set_example_1 = gr.Button('Rope - Long') + + with gr.Column(scale=1): + run_set_example_2 = gr.Button('Toy Animal') + + with gr.Row(): + with gr.Column(scale=1): + gr.Markdown("**Step 2**: select the action.") + + with gr.Column(scale=1): + run_set_action_0 = gr.Button('Action 1') + + with gr.Column(scale=1): + run_set_action_1 = gr.Button('Action 2') + + with gr.Column(scale=1): + run_set_action_2 = gr.Button('Action 3') + + with gr.Row(): + with gr.Column(scale=1): + gr.Markdown("**Step 3**: click **Run** to visualize the predicted video and Splats.") + + with gr.Column(scale=1): + run_run = gr.Button('Run') + + with gr.Row(): + with gr.Column(scale=1, min_width=20): + with gr.Row(): + gr.Markdown() + + with gr.Row(): + gr.Markdown() + + with gr.Row(): + gr.Markdown() + + with gr.Row(): + gr.Markdown() + + # with gr.Row(): + # gr.Markdown() + + # with gr.Row(): + # gr.Markdown() + + with gr.Row(): + gr.Markdown("Our model uses only 4 cameras for reconstructing the Gaussian Splats. Click the buttons below to change the view.") + + with gr.Row(): + run_view_0 = gr.Button('View 0') + + with gr.Row(): + run_view_1 = gr.Button('View 1') + + with gr.Row(): + run_view_2 = gr.Button('View 2') + + with gr.Row(): + run_view_3 = gr.Button('View 3') + + with gr.Column(scale=4): + with gr.Row(): + with gr.Column(scale=2): + form_image = gr.Image( + label='Initial state and actions', + value=None, + width=self.width, + height=self.height, + ) + + with gr.Column(scale=2): + form_video = gr.Video( + label='Predicted video', + value=None, + width=self.width, + height=self.height, + ) + + with gr.Row(): + # with gr.Column(scale=1, min_width=20): + # pass + # with gr.Row(): + # change_bg = gr.Button('Black/White Background') + + with gr.Column(scale=2): + form_3dgs_orig = gr.Model3D( + label='Original Gaussian Splats', + value=None, + ) + + with gr.Column(scale=2): + form_3dgs_pred = gr.Model3D( + label='Predicted Gaussian Splats', + value=None, + ) + + with gr.Row(): + gr.Markdown("## Notes:") + + with gr.Row(): + gr.Markdown("- Due to the computation constraints of Hugging Face Space, all results are precomputed. ") + + with gr.Row(): + gr.Markdown("- Training a GS for an object takes around 30 seconds. Prediction typically takes only 1-2 seconds for each push!") + + with gr.Row(): + gr.Markdown("- More examples may be added in the future. Stay tuned!") + + # with gr.Row(): + # with gr.Column(scale=1): + # gr.Markdown("You can change the view to any of the 4 cameras.") + + # with gr.Column(scale=1): + # run_view_0 = gr.Button('View 1') + + # with gr.Column(scale=1): + # run_view_1 = gr.Button('View 2') + + # with gr.Column(scale=1): + # run_view_2 = gr.Button('View 3') + + # with gr.Column(scale=1): + # run_view_3 = gr.Button('View 4') + + # Set up callbacks + + run_reset.click(self.reset, + inputs=[], + outputs=[form_image, form_video, form_3dgs_orig, form_3dgs_pred]) + + run_set_example_0.click(self.on_click_set_example, + inputs=[gr.State({'example_id': 0})], + outputs=[form_image, form_video, form_3dgs_orig, form_3dgs_pred]) + + run_set_example_1.click(self.on_click_set_example, + inputs=[gr.State({'example_id': 1})], + outputs=[form_image, form_video, form_3dgs_orig, form_3dgs_pred]) + + run_set_example_2.click(self.on_click_set_example, + inputs=[gr.State({'example_id': 2})], + outputs=[form_image, form_video, form_3dgs_orig, form_3dgs_pred]) + + run_set_action_0.click(self.on_click_set_action, + inputs=[gr.State({'action_id': 0})], + outputs=[form_image]) + + run_set_action_1.click(self.on_click_set_action, + inputs=[gr.State({'action_id': 1})], + outputs=[form_image]) + + run_set_action_2.click(self.on_click_set_action, + inputs=[gr.State({'action_id': 2})], + outputs=[form_image]) + + run_run.click(self.on_click_run, + inputs=[], + outputs=[form_video, form_3dgs_pred]) + + run_view_0.click(self.on_click_change_view, + inputs=[gr.State({'view_id': 1})], + outputs=[form_image, form_video]) + + run_view_1.click(self.on_click_change_view, + inputs=[gr.State({'view_id': 2})], + outputs=[form_image, form_video]) + + run_view_2.click(self.on_click_change_view, + inputs=[gr.State({'view_id': 3})], + outputs=[form_image, form_video]) + + run_view_3.click(self.on_click_change_view, + inputs=[gr.State({'view_id': 0})], + outputs=[form_image, form_video]) + + # change_bg.click(self.on_click_change_bg, + # inputs=[], + # outputs=[form_3dgs_orig, form_3dgs_pred]) + + app.launch(share=share) + + +if __name__ == '__main__': + visualizer = DynamicsVisualizer() + visualizer.launch(share=True) diff --git a/data/0/action-0/gs_pred.splat b/data/0/action-0/gs_pred.splat new file mode 100644 index 0000000000000000000000000000000000000000..04b2e665f9635ccc110ef5532ae7c217d8df6a9e Binary files /dev/null and b/data/0/action-0/gs_pred.splat differ diff --git a/data/0/action-0/img_0.png b/data/0/action-0/img_0.png new file mode 100644 index 0000000000000000000000000000000000000000..8ad5161c6b29e40d5138934e201f2cf1615efd19 Binary files /dev/null and b/data/0/action-0/img_0.png differ diff --git a/data/0/action-0/img_1.png b/data/0/action-0/img_1.png new file mode 100644 index 0000000000000000000000000000000000000000..94d19fbd24a60f5666924bdcd24dd8647710e9ca Binary files /dev/null and b/data/0/action-0/img_1.png differ diff --git a/data/0/action-0/img_2.png b/data/0/action-0/img_2.png new file mode 100644 index 0000000000000000000000000000000000000000..8c54fc5aa6df04de1e08f38daf0ca84e8feaa9f1 Binary files /dev/null and b/data/0/action-0/img_2.png differ diff --git a/data/0/action-0/img_3.png b/data/0/action-0/img_3.png new file mode 100644 index 0000000000000000000000000000000000000000..554d260f7169fe5dd33aeea90f38403a0aa19bc0 Binary files /dev/null and b/data/0/action-0/img_3.png differ diff --git a/data/0/action-0/video_0.mp4 b/data/0/action-0/video_0.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..4e3b09974c37d22f9513ce041b759a8d86ff5d69 Binary files /dev/null and b/data/0/action-0/video_0.mp4 differ diff --git a/data/0/action-0/video_1.mp4 b/data/0/action-0/video_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..fb29548550fd57be868e5902299e0eec45754b25 Binary files /dev/null and b/data/0/action-0/video_1.mp4 differ diff --git a/data/0/action-0/video_2.mp4 b/data/0/action-0/video_2.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2c7f49981965fa8f67a87a50c591956652c4eedc Binary files /dev/null and b/data/0/action-0/video_2.mp4 differ diff --git a/data/0/action-0/video_3.mp4 b/data/0/action-0/video_3.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..0acb6b942f6481598ee1a2b49d06ff814576c82e Binary files /dev/null and b/data/0/action-0/video_3.mp4 differ diff --git a/data/0/action-1/gs_pred.splat b/data/0/action-1/gs_pred.splat new file mode 100644 index 0000000000000000000000000000000000000000..656dcf34bc874ff01704fdbfc83f65ee1e62bd07 Binary files /dev/null and b/data/0/action-1/gs_pred.splat differ diff --git a/data/0/action-1/img_0.png b/data/0/action-1/img_0.png new file mode 100644 index 0000000000000000000000000000000000000000..490a81a5e1753cb54ceff8c3649a0dea997cc81d Binary files /dev/null and b/data/0/action-1/img_0.png differ diff --git a/data/0/action-1/img_1.png b/data/0/action-1/img_1.png new file mode 100644 index 0000000000000000000000000000000000000000..b7a63f78c6d05933dbdb9407ec17edc0f99d9083 Binary files /dev/null and b/data/0/action-1/img_1.png differ diff --git a/data/0/action-1/img_2.png b/data/0/action-1/img_2.png new file mode 100644 index 0000000000000000000000000000000000000000..f5344a1f1a6652c7f3b477f788ceac17c813e63c Binary files /dev/null and b/data/0/action-1/img_2.png differ diff --git a/data/0/action-1/img_3.png b/data/0/action-1/img_3.png new file mode 100644 index 0000000000000000000000000000000000000000..792334afcc4388ccb0b8ddf74090d008d74fe5e6 Binary files /dev/null and b/data/0/action-1/img_3.png differ diff --git a/data/0/action-1/video_0.mp4 b/data/0/action-1/video_0.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..5a0f3e0187a1146bd1076364dde21b9065db8fff Binary files /dev/null and b/data/0/action-1/video_0.mp4 differ diff --git a/data/0/action-1/video_1.mp4 b/data/0/action-1/video_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..97bbde352d980854e2a3cc970da1a2c40f746cb1 Binary files /dev/null and b/data/0/action-1/video_1.mp4 differ diff --git a/data/0/action-1/video_2.mp4 b/data/0/action-1/video_2.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..07a16f3f15a0dea7d95a9323a7443db1d28f0a3e Binary files /dev/null and b/data/0/action-1/video_2.mp4 differ diff --git a/data/0/action-1/video_3.mp4 b/data/0/action-1/video_3.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..bb736653099a6167724d0997edf6b4b549399ca8 Binary files /dev/null and b/data/0/action-1/video_3.mp4 differ diff --git a/data/0/action-2/gs_pred.splat b/data/0/action-2/gs_pred.splat new file mode 100644 index 0000000000000000000000000000000000000000..c3111509bd1d8169585fffdc9e8a9ae001c2e738 Binary files /dev/null and b/data/0/action-2/gs_pred.splat differ diff --git a/data/0/action-2/img_0.png b/data/0/action-2/img_0.png new file mode 100644 index 0000000000000000000000000000000000000000..b924218cc0e246cb9291d3968623086984eca404 Binary files /dev/null and b/data/0/action-2/img_0.png differ diff --git a/data/0/action-2/img_1.png b/data/0/action-2/img_1.png new file mode 100644 index 0000000000000000000000000000000000000000..75f47d2fb9142d891f665aa4672a3697e6d7c899 Binary files /dev/null and b/data/0/action-2/img_1.png differ diff --git a/data/0/action-2/img_2.png b/data/0/action-2/img_2.png new file mode 100644 index 0000000000000000000000000000000000000000..c556d9ef44edce7e5224744b4a4f16b0b9cb32e2 Binary files /dev/null and b/data/0/action-2/img_2.png differ diff --git a/data/0/action-2/img_3.png b/data/0/action-2/img_3.png new file mode 100644 index 0000000000000000000000000000000000000000..41f3c61302fc7d7d6d0eb6fe110e2e8ed3c66a90 Binary files /dev/null and b/data/0/action-2/img_3.png differ diff --git a/data/0/action-2/video_0.mp4 b/data/0/action-2/video_0.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..5b7bd721f3bf0470cbc1f84ba66960f300af4665 Binary files /dev/null and b/data/0/action-2/video_0.mp4 differ diff --git a/data/0/action-2/video_1.mp4 b/data/0/action-2/video_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..fdfe51058e4bce02b9a5c683c9946c6b9b350c29 Binary files /dev/null and b/data/0/action-2/video_1.mp4 differ diff --git a/data/0/action-2/video_2.mp4 b/data/0/action-2/video_2.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..4225ff560943a0b395de7e55383d053c9e513822 Binary files /dev/null and b/data/0/action-2/video_2.mp4 differ diff --git a/data/0/action-2/video_3.mp4 b/data/0/action-2/video_3.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..635adc6a5dec54d939d0d6971a228c4bcbd4c5b0 Binary files /dev/null and b/data/0/action-2/video_3.mp4 differ diff --git a/data/0/gs_orig.splat b/data/0/gs_orig.splat new file mode 100644 index 0000000000000000000000000000000000000000..b25224107dceb3e956feae84ec5c67ada357bfca Binary files /dev/null and b/data/0/gs_orig.splat differ diff --git a/data/0/img_0.png b/data/0/img_0.png new file mode 100644 index 0000000000000000000000000000000000000000..c623ff313ad189a7155bf52df64db6bb08e0e01f Binary files /dev/null and b/data/0/img_0.png differ diff --git a/data/0/img_1.png b/data/0/img_1.png new file mode 100644 index 0000000000000000000000000000000000000000..c49073fb3f249f8b5d18174dab463235021f7743 Binary files /dev/null and b/data/0/img_1.png differ diff --git a/data/0/img_2.png b/data/0/img_2.png new file mode 100644 index 0000000000000000000000000000000000000000..c8395f7b12fd759a267bbe08fc2410ea120c65f4 Binary files /dev/null and b/data/0/img_2.png differ diff --git a/data/0/img_3.png b/data/0/img_3.png new file mode 100644 index 0000000000000000000000000000000000000000..68745414543645e7cd38340211e709ad3228de4d Binary files /dev/null and b/data/0/img_3.png differ diff --git a/data/1/action-0/gs_pred.splat b/data/1/action-0/gs_pred.splat new file mode 100644 index 0000000000000000000000000000000000000000..87b22404a1052ceeab891557202198d64d056510 Binary files /dev/null and b/data/1/action-0/gs_pred.splat differ diff --git a/data/1/action-0/img_0.png b/data/1/action-0/img_0.png new file mode 100644 index 0000000000000000000000000000000000000000..ad956e51daf4025f524b795d5d8da23932c5d709 Binary files /dev/null and b/data/1/action-0/img_0.png differ diff --git a/data/1/action-0/img_1.png b/data/1/action-0/img_1.png new file mode 100644 index 0000000000000000000000000000000000000000..a12eb678e16cc504d85bd63671e93d781b5e7dfa Binary files /dev/null and b/data/1/action-0/img_1.png differ diff --git a/data/1/action-0/img_2.png b/data/1/action-0/img_2.png new file mode 100644 index 0000000000000000000000000000000000000000..1c8629f8a477005943ecb2317f53adaac6b59a3f Binary files /dev/null and b/data/1/action-0/img_2.png differ diff --git a/data/1/action-0/img_3.png b/data/1/action-0/img_3.png new file mode 100644 index 0000000000000000000000000000000000000000..8d03a7c89c398fecd101eb1aefa687e21a15e9df Binary files /dev/null and b/data/1/action-0/img_3.png differ diff --git a/data/1/action-0/video_0.mp4 b/data/1/action-0/video_0.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..01e57f84ab8f21e4d9ad17e703231f4327b32122 Binary files /dev/null and b/data/1/action-0/video_0.mp4 differ diff --git a/data/1/action-0/video_1.mp4 b/data/1/action-0/video_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..19b9138bd91ec4aa9e6740b88d232a3c2dc4d71b Binary files /dev/null and b/data/1/action-0/video_1.mp4 differ diff --git a/data/1/action-0/video_2.mp4 b/data/1/action-0/video_2.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..894b068dff8144b660708abd8d4c0413e50d8a4d Binary files /dev/null and b/data/1/action-0/video_2.mp4 differ diff --git a/data/1/action-0/video_3.mp4 b/data/1/action-0/video_3.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..5d4cda9b585bbe5abb1f962fb826d894fde89856 Binary files /dev/null and b/data/1/action-0/video_3.mp4 differ diff --git a/data/1/action-1/gs_pred.splat b/data/1/action-1/gs_pred.splat new file mode 100644 index 0000000000000000000000000000000000000000..6ccb8fb57b1bc32295cd854b0fc910d546e73eb0 Binary files /dev/null and b/data/1/action-1/gs_pred.splat differ diff --git a/data/1/action-1/img_0.png b/data/1/action-1/img_0.png new file mode 100644 index 0000000000000000000000000000000000000000..13d2914bcf0998c02123cb98af2e516d19d27111 Binary files /dev/null and b/data/1/action-1/img_0.png differ diff --git a/data/1/action-1/img_1.png b/data/1/action-1/img_1.png new file mode 100644 index 0000000000000000000000000000000000000000..c87d6fc98497b172e6ae07edb89a77413d295ff4 Binary files /dev/null and b/data/1/action-1/img_1.png differ diff --git a/data/1/action-1/img_2.png b/data/1/action-1/img_2.png new file mode 100644 index 0000000000000000000000000000000000000000..c280eabb16690a5ba1ba1ab35f69704bc718fdc8 Binary files /dev/null and b/data/1/action-1/img_2.png differ diff --git a/data/1/action-1/img_3.png b/data/1/action-1/img_3.png new file mode 100644 index 0000000000000000000000000000000000000000..22bea4aa11fa72281b63c1d6f12e85fd3e929abb Binary files /dev/null and b/data/1/action-1/img_3.png differ diff --git a/data/1/action-1/video_0.mp4 b/data/1/action-1/video_0.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..f2c5553f9dcc8a6219c218f64dc0bf3efc473423 Binary files /dev/null and b/data/1/action-1/video_0.mp4 differ diff --git a/data/1/action-1/video_1.mp4 b/data/1/action-1/video_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..18d578f9db8cc79c58a0ea7764dddb8c6632a231 Binary files /dev/null and b/data/1/action-1/video_1.mp4 differ diff --git a/data/1/action-1/video_2.mp4 b/data/1/action-1/video_2.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..6f7bab7669493f2ad0fb1e52961fe598167e67ad Binary files /dev/null and b/data/1/action-1/video_2.mp4 differ diff --git a/data/1/action-1/video_3.mp4 b/data/1/action-1/video_3.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..0140057daf77f9b52caab3ada0eec9b39391cd50 Binary files /dev/null and b/data/1/action-1/video_3.mp4 differ diff --git a/data/1/action-2/gs_pred.splat b/data/1/action-2/gs_pred.splat new file mode 100644 index 0000000000000000000000000000000000000000..b032dbbe1eca98445c86c697ff8cb32beaf08c33 Binary files /dev/null and b/data/1/action-2/gs_pred.splat differ diff --git a/data/1/action-2/img_0.png b/data/1/action-2/img_0.png new file mode 100644 index 0000000000000000000000000000000000000000..7a4c559fd093ff379f1d340d02cd9577e0b7156d Binary files /dev/null and b/data/1/action-2/img_0.png differ diff --git a/data/1/action-2/img_1.png b/data/1/action-2/img_1.png new file mode 100644 index 0000000000000000000000000000000000000000..007888f8ddf2fe994696c5b2df4a24ad38a5a5b6 Binary files /dev/null and b/data/1/action-2/img_1.png differ diff --git a/data/1/action-2/img_2.png b/data/1/action-2/img_2.png new file mode 100644 index 0000000000000000000000000000000000000000..64502059fbe951c121d82ec374796c5ec948449f Binary files /dev/null and b/data/1/action-2/img_2.png differ diff --git a/data/1/action-2/img_3.png b/data/1/action-2/img_3.png new file mode 100644 index 0000000000000000000000000000000000000000..e44c08e3bc97bc7611a9d16e5a15bd81cf77a68a Binary files /dev/null and b/data/1/action-2/img_3.png differ diff --git a/data/1/action-2/video_0.mp4 b/data/1/action-2/video_0.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b681b20416cd5945247fba1c0bf7398d4d333012 Binary files /dev/null and b/data/1/action-2/video_0.mp4 differ diff --git a/data/1/action-2/video_1.mp4 b/data/1/action-2/video_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..a927d3e12a62d12d3f23051b3868fa2a67ee42ed Binary files /dev/null and b/data/1/action-2/video_1.mp4 differ diff --git a/data/1/action-2/video_2.mp4 b/data/1/action-2/video_2.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..6376b42006d5f27887c6c49328df8e00a84c33c3 Binary files /dev/null and b/data/1/action-2/video_2.mp4 differ diff --git a/data/1/action-2/video_3.mp4 b/data/1/action-2/video_3.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2ecdc0a94f04dfadd60487c3c0041b293fa8907f Binary files /dev/null and b/data/1/action-2/video_3.mp4 differ diff --git a/data/1/gs_orig.splat b/data/1/gs_orig.splat new file mode 100644 index 0000000000000000000000000000000000000000..d0c4a38eaa06c370ec2ed96e8907046b9730c962 Binary files /dev/null and b/data/1/gs_orig.splat differ diff --git a/data/1/img_0.png b/data/1/img_0.png new file mode 100644 index 0000000000000000000000000000000000000000..b711a249b91302e14aa06ac9a4584e01a241cfbd Binary files /dev/null and b/data/1/img_0.png differ diff --git a/data/1/img_1.png b/data/1/img_1.png new file mode 100644 index 0000000000000000000000000000000000000000..d288d20237e487f91e81ce7b7d01c117bb9392ab Binary files /dev/null and b/data/1/img_1.png differ diff --git a/data/1/img_2.png b/data/1/img_2.png new file mode 100644 index 0000000000000000000000000000000000000000..67f345f03557a6f3371a77d266d56ca80b79ed46 Binary files /dev/null and b/data/1/img_2.png differ diff --git a/data/1/img_3.png b/data/1/img_3.png new file mode 100644 index 0000000000000000000000000000000000000000..65ab53de74158c947b98c680261670c91a1e6856 Binary files /dev/null and b/data/1/img_3.png differ diff --git a/data/2/action-0/gs_pred.splat b/data/2/action-0/gs_pred.splat new file mode 100644 index 0000000000000000000000000000000000000000..b45ab2c5f6195b61ef0051abb567e6a95c09444b Binary files /dev/null and b/data/2/action-0/gs_pred.splat differ diff --git a/data/2/action-0/img_0.png b/data/2/action-0/img_0.png new file mode 100644 index 0000000000000000000000000000000000000000..d62940e898cad147b1790e7dced361b8399cbb35 Binary files /dev/null and b/data/2/action-0/img_0.png differ diff --git a/data/2/action-0/img_1.png b/data/2/action-0/img_1.png new file mode 100644 index 0000000000000000000000000000000000000000..994a4541f75ff9f96aa03495e86a01d4a8668e4b Binary files /dev/null and b/data/2/action-0/img_1.png differ diff --git a/data/2/action-0/img_2.png b/data/2/action-0/img_2.png new file mode 100644 index 0000000000000000000000000000000000000000..06157cd83a37422b8bef616510a16289c63c1175 Binary files /dev/null and b/data/2/action-0/img_2.png differ diff --git a/data/2/action-0/img_3.png b/data/2/action-0/img_3.png new file mode 100644 index 0000000000000000000000000000000000000000..acf220b3d2bc5cbacf2a6d8e29e53f58f23da51d Binary files /dev/null and b/data/2/action-0/img_3.png differ diff --git a/data/2/action-0/video_0.mp4 b/data/2/action-0/video_0.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2640a134acb52148b12aff7c20f0ac36b8026934 Binary files /dev/null and b/data/2/action-0/video_0.mp4 differ diff --git a/data/2/action-0/video_1.mp4 b/data/2/action-0/video_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..cdc84a55f87f207f5d915728df8718ef12dd62c5 Binary files /dev/null and b/data/2/action-0/video_1.mp4 differ diff --git a/data/2/action-0/video_2.mp4 b/data/2/action-0/video_2.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..598ab77ca49ec4ea6e985ef0c9ac9462d03c2063 Binary files /dev/null and b/data/2/action-0/video_2.mp4 differ diff --git a/data/2/action-0/video_3.mp4 b/data/2/action-0/video_3.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..33420ba14e4177fc26635cc64870090448f04b41 Binary files /dev/null and b/data/2/action-0/video_3.mp4 differ diff --git a/data/2/action-1/gs_pred.splat b/data/2/action-1/gs_pred.splat new file mode 100644 index 0000000000000000000000000000000000000000..188221bc9179a2b8da4212e5accd023097d21f99 Binary files /dev/null and b/data/2/action-1/gs_pred.splat differ diff --git a/data/2/action-1/img_0.png b/data/2/action-1/img_0.png new file mode 100644 index 0000000000000000000000000000000000000000..8be0aeffdbbd32d1feb3c4cb24d3ccdeae34af80 Binary files /dev/null and b/data/2/action-1/img_0.png differ diff --git a/data/2/action-1/img_1.png b/data/2/action-1/img_1.png new file mode 100644 index 0000000000000000000000000000000000000000..7c6f1af1826f0a89437634953611070ce243dd48 Binary files /dev/null and b/data/2/action-1/img_1.png differ diff --git a/data/2/action-1/img_2.png b/data/2/action-1/img_2.png new file mode 100644 index 0000000000000000000000000000000000000000..f300c1cd4614d177e836c1af5e963f1a85bab06f Binary files /dev/null and b/data/2/action-1/img_2.png differ diff --git a/data/2/action-1/img_3.png b/data/2/action-1/img_3.png new file mode 100644 index 0000000000000000000000000000000000000000..4c3547963069e5f18f854bdee87a55bf022bc151 Binary files /dev/null and b/data/2/action-1/img_3.png differ diff --git a/data/2/action-1/video_0.mp4 b/data/2/action-1/video_0.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..9b93e590cb0ead807534ed7e0d5f86b322364f4b Binary files /dev/null and b/data/2/action-1/video_0.mp4 differ diff --git a/data/2/action-1/video_1.mp4 b/data/2/action-1/video_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2907ec49d70a735242177c4f3e862bba3bbd6b2c Binary files /dev/null and b/data/2/action-1/video_1.mp4 differ diff --git a/data/2/action-1/video_2.mp4 b/data/2/action-1/video_2.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..7e8831c9a152159d2a7582099d25abca8fac094e Binary files /dev/null and b/data/2/action-1/video_2.mp4 differ diff --git a/data/2/action-1/video_3.mp4 b/data/2/action-1/video_3.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..98fc41303d4f752a6d78cc56ea43964faa3a0f5e Binary files /dev/null and b/data/2/action-1/video_3.mp4 differ diff --git a/data/2/action-2/gs_pred.splat b/data/2/action-2/gs_pred.splat new file mode 100644 index 0000000000000000000000000000000000000000..a98897efe5ff230ba5e1a3eeb171d20dff3f2287 Binary files /dev/null and b/data/2/action-2/gs_pred.splat differ diff --git a/data/2/action-2/img_0.png b/data/2/action-2/img_0.png new file mode 100644 index 0000000000000000000000000000000000000000..fbb0ae28a7918ccb2d9240add45c4a0a1be6574f Binary files /dev/null and b/data/2/action-2/img_0.png differ diff --git a/data/2/action-2/img_1.png b/data/2/action-2/img_1.png new file mode 100644 index 0000000000000000000000000000000000000000..9144ee65c3e777efcdb745e5d1feee5073ce758d Binary files /dev/null and b/data/2/action-2/img_1.png differ diff --git a/data/2/action-2/img_2.png b/data/2/action-2/img_2.png new file mode 100644 index 0000000000000000000000000000000000000000..04921c2074507331487f1e896f706f8a1f553ed3 Binary files /dev/null and b/data/2/action-2/img_2.png differ diff --git a/data/2/action-2/img_3.png b/data/2/action-2/img_3.png new file mode 100644 index 0000000000000000000000000000000000000000..148c844a0db1615a8f5c7336570abaaf04868bba Binary files /dev/null and b/data/2/action-2/img_3.png differ diff --git a/data/2/action-2/video_0.mp4 b/data/2/action-2/video_0.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..8df1426183266c8ae629a423d2fda0fdab09cd26 Binary files /dev/null and b/data/2/action-2/video_0.mp4 differ diff --git a/data/2/action-2/video_1.mp4 b/data/2/action-2/video_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..c479553d9c07c56f0ae3b25a34e578c4a2039857 Binary files /dev/null and b/data/2/action-2/video_1.mp4 differ diff --git a/data/2/action-2/video_2.mp4 b/data/2/action-2/video_2.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..19bff79de789da230a254d963622a8cc87d00b4f Binary files /dev/null and b/data/2/action-2/video_2.mp4 differ diff --git a/data/2/action-2/video_3.mp4 b/data/2/action-2/video_3.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..57c7a58b52cb014f21d65d76bd7cc2557b1fa7da Binary files /dev/null and b/data/2/action-2/video_3.mp4 differ diff --git a/data/2/gs_orig.splat b/data/2/gs_orig.splat new file mode 100644 index 0000000000000000000000000000000000000000..d3395fe8fa1e4e7e86c6bec00fad64cd067f8b8c Binary files /dev/null and b/data/2/gs_orig.splat differ diff --git a/data/2/img_0.png b/data/2/img_0.png new file mode 100644 index 0000000000000000000000000000000000000000..fcab52fa9e776cb1cdd3ecab63f0d6dd1734dcd3 Binary files /dev/null and b/data/2/img_0.png differ diff --git a/data/2/img_1.png b/data/2/img_1.png new file mode 100644 index 0000000000000000000000000000000000000000..08d8f1a794e0df5e9703511eaf5cc806692bb6b2 Binary files /dev/null and b/data/2/img_1.png differ diff --git a/data/2/img_2.png b/data/2/img_2.png new file mode 100644 index 0000000000000000000000000000000000000000..492b711f3a0d2695674362e245f512e1adaa6f3b Binary files /dev/null and b/data/2/img_2.png differ diff --git a/data/2/img_3.png b/data/2/img_3.png new file mode 100644 index 0000000000000000000000000000000000000000..09be9ed5ebb84cb653f211032c8c20447137d0a3 Binary files /dev/null and b/data/2/img_3.png differ diff --git a/gradio_utils.py b/gradio_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..716d4fef12acd767c5e19003de87bc5d6683ecf2 --- /dev/null +++ b/gradio_utils.py @@ -0,0 +1,155 @@ +import gradio as gr +import numpy as np +from PIL import Image, ImageDraw + + +def get_valid_mask(mask: np.ndarray): + """Convert mask from gr.Image(0 to 255, RGBA) to binary mask. + """ + if mask.ndim == 3: + mask_pil = Image.fromarray(mask).convert('L') + mask = np.array(mask_pil) + if mask.max() == 255: + mask = mask / 255 + return mask + + +def draw_points_on_image(image, points, intr, extr, z, radius_scale=0.006): + overlay_rgba = Image.new("RGBA", image.size, 0) + overlay_draw = ImageDraw.Draw(overlay_rgba) + for point_key, point in points.items(): + + t_color = (255, 100, 100) + o_color = (255, 50, 50) + + rad_draw = int(image.size[0] * radius_scale) + 2 + + p_start = point["start"] + p_target = point["target"] + + if p_start is not None and p_target is not None: + p_draw = int(p_start[0]), int(p_start[1]) + t_draw = int(p_target[0]), int(p_target[1]) + + pt = (p_target[0] - p_start[0], p_target[1] - p_start[1]) + pt_norm = np.linalg.norm(pt) + pt_unit = (pt[0] / pt_norm, pt[1] / pt_norm) + pt_tang = (pt_unit[1], -pt_unit[0]) + tt1 = (t_draw[0] + pt_tang[0] * 0.1 * pt_norm - pt_unit[0] * 0.1 * pt_norm, + t_draw[1] + pt_tang[1] * 0.1 * pt_norm - pt_unit[1] * 0.1 * pt_norm) + tt2 = (t_draw[0] - pt_tang[0] * 0.1 * pt_norm - pt_unit[0] * 0.1 * pt_norm, + t_draw[1] - pt_tang[1] * 0.1 * pt_norm - pt_unit[1] * 0.1 * pt_norm) + tt1_draw = int(tt1[0]), int(tt1[1]) + tt2_draw = int(tt2[0]), int(tt2[1]) + + overlay_draw.line( + (p_draw[0], p_draw[1], t_draw[0], t_draw[1]), + fill=o_color, + width=4, + ) + + overlay_draw.line( + (t_draw[0], t_draw[1], tt1_draw[0], tt1_draw[1]), + fill=o_color, + width=4, + ) + + overlay_draw.line( + (t_draw[0], t_draw[1], tt2_draw[0], tt2_draw[1]), + fill=o_color, + width=4, + ) + + if p_start is not None: + p_draw = int(p_start[0]), int(p_start[1]) + overlay_draw.ellipse( + ( + p_draw[0] - rad_draw, + p_draw[1] - rad_draw, + p_draw[0] + rad_draw, + p_draw[1] + rad_draw, + ), + fill=t_color, + outline=o_color, + width=2, + ) + + if p_target is not None: + assert p_start is not None + + return Image.alpha_composite(image.convert("RGBA"), + overlay_rgba).convert("RGB") + + +def draw_raw_points_on_image(image, + points, + # curr_point=None, + # highlight_all=True, + radius_scale=0.002): + overlay_rgba = Image.new("RGBA", image.size, 0) + overlay_draw = ImageDraw.Draw(overlay_rgba) + for p in range(points.shape[0]): + point = points[p] + t_color = (150, 150, 255) + o_color = (50, 50, 255) + + rad_draw = int(image.size[0] * radius_scale) + + t_draw = int(point[0]), int(point[1]) + overlay_draw.ellipse( + ( + t_draw[0] - rad_draw, + t_draw[1] - rad_draw, + t_draw[0] + rad_draw, + t_draw[1] + rad_draw, + ), + fill=t_color, + outline=o_color, + ) + return Image.alpha_composite(image.convert("RGBA"), + overlay_rgba).convert("RGB") + + +def draw_mask_on_image(image, mask): + im_mask = np.uint8(mask * 255) + im_mask_rgba = np.concatenate( + ( + np.tile(im_mask[..., None], [1, 1, 3]), + 45 * np.ones( + (im_mask.shape[0], im_mask.shape[1], 1), dtype=np.uint8), + ), + axis=-1, + ) + im_mask_rgba = Image.fromarray(im_mask_rgba).convert("RGBA") + + return Image.alpha_composite(image.convert("RGBA"), + im_mask_rgba).convert("RGB") + + +def on_change_single_global_state(keys, + value, + global_state, + map_transform=None): + if map_transform is not None: + value = map_transform(value) + + curr_state = global_state + if isinstance(keys, str): + last_key = keys + + else: + for k in keys[:-1]: + curr_state = curr_state[k] + + last_key = keys[-1] + + curr_state[last_key] = value + return global_state + + +def get_latest_points_pair(points_dict): + if not points_dict: + return None + point_idx = list(points_dict.keys()) + latest_point_idx = max(point_idx) + return latest_point_idx