Spaces:

Chaerin5
/

FoundHand

Running on Zero

App Files Files Community

Chaerin5 commited on Mar 30

Commit

7685578

1 Parent(s): 6aa317f

instruction renovation; allow manual keypoints at edit hands

Browse files

Files changed (3) hide show

app.py +506 -224
bad_hands/14.jpg +3 -0
bad_hands/15.jpg +3 -0

app.py CHANGED Viewed

@@ -266,7 +266,7 @@ hands = mp_hands.Hands(
     min_detection_confidence=0.1,
 )
-def get_ref_anno(ref):
     if ref is None:
         return (
             None,
@@ -280,55 +280,73 @@ def get_ref_anno(ref):
     img = ref["composite"][..., :3]
     img = cv2.resize(img, opts.image_size, interpolation=cv2.INTER_AREA)
     keypts = np.zeros((42, 2))
-    if REF_POSE_MASK:
-        mp_pose = hands.process(img)
-        detected = np.array([0, 0])
-        start_idx = 0
-        if mp_pose.multi_hand_landmarks:
-            # handedness is flipped assuming the input image is mirrored in MediaPipe
-            for hand_landmarks, handedness in zip(
-                mp_pose.multi_hand_landmarks, mp_pose.multi_handedness
-            ):
-                # actually right hand
-                if handedness.classification[0].label == "Left":
-                    start_idx = 0
-                    detected[0] = 1
-                # actually left hand
-                elif handedness.classification[0].label == "Right":
-                    start_idx = 21
-                    detected[1] = 1
-                for i, landmark in enumerate(hand_landmarks.landmark):
-                    keypts[start_idx + i] = [
-                        landmark.x * opts.image_size[1],
-                        landmark.y * opts.image_size[0],
-                    ]
-            sam_predictor.set_image(img)
-            l = keypts[:21].shape[0]
-            if keypts[0].sum() != 0 and keypts[21].sum() != 0:
-                input_point = np.array([keypts[0], keypts[21]])
-                input_label = np.array([1, 1])
-            elif keypts[0].sum() != 0:
-                input_point = np.array(keypts[:1])
-                input_label = np.array([1])
-            elif keypts[21].sum() != 0:
-                input_point = np.array(keypts[21:22])
-                input_label = np.array([1])
-            masks, _, _ = sam_predictor.predict(
-                point_coords=input_point,
-                point_labels=input_label,
-                multimask_output=False,
-            )
-            hand_mask = masks[0]
-            masked_img = img * hand_mask[..., None] + 255 * (1 - hand_mask[..., None])
-            ref_pose = visualize_hand(keypts, masked_img)
         else:
-            raise gr.Error("No hands detected in the reference image.")
     else:
         hand_mask = np.zeros_like(img[:,:, 0])
         ref_pose = np.zeros_like(img)
-    print(f"keypts.max(): {keypts.max()}, keypts.min(): {keypts.min()}")
     def make_ref_cond(
         img,
         keypts,
@@ -344,7 +362,7 @@ def get_ref_anno(ref):
                 Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True),
             ]
         )
-        image = image_transform(img)
         kpts_valid = check_keypoints_validity(keypts, target_size)
         heatmaps = torch.tensor(
             keypoint_heatmap(
@@ -352,6 +370,7 @@ def get_ref_anno(ref):
             )
             * kpts_valid[:, None, None],
             dtype=torch.float,
         )[None, ...]
         mask = torch.tensor(
             cv2.resize(
@@ -360,6 +379,7 @@ def get_ref_anno(ref):
                 interpolation=cv2.INTER_NEAREST,
             ),
             dtype=torch.float,
         ).unsqueeze(0)[None, ...]
         return image[None, ...], heatmaps, mask
@@ -566,7 +586,7 @@ def sample_diff(ref_cond, target_cond, target_keypts, num_gen, seed, cfg):
     print(f"results[0].max(): {results[0].max()}")
     return results, results_pose
-# @spaces.GPU(duration=120)
 def ready_sample(img_ori, inpaint_mask, keypts):
     img = cv2.resize(img_ori[..., :3], opts.image_size, interpolation=cv2.INTER_AREA)
     sam_predictor.set_image(img)
@@ -588,21 +608,6 @@ def ready_sample(img_ori, inpaint_mask, keypts):
     keypts = np.concatenate(keypts, axis=0)
     keypts = scale_keypoint(keypts, (LENGTH, LENGTH), opts.image_size)
-    # if keypts[0].sum() != 0 and keypts[21].sum() != 0:
-    #     input_point = np.array([keypts[0], keypts[21]])
-    #     # input_point = keypts
-    #     input_label = np.array([1, 1])
-    #     # input_label = np.ones_like(input_point[:, 0])
-    # elif keypts[0].sum() != 0:
-    #     input_point = np.array(keypts[:1])
-    #     # input_point = keypts[:21]
-    #     input_label = np.array([1])
-    #     # input_label = np.ones_like(input_point[:21, 0])
-    # elif keypts[21].sum() != 0:
-    #     input_point = np.array(keypts[21:22])
-    #     # input_point = keypts[21:]
-    #     input_label = np.array([1])
-    #     # input_label = np.ones_like(input_point[21:, 0])
     box_shift_ratio = 0.5
     box_size_factor = 1.2
@@ -784,7 +789,8 @@ def sample_inpaint(
 def flip_hand(
-    img, pose_img, cond: Optional[torch.Tensor], keypts: Optional[torch.Tensor] = None
 ):
     if cond is None:  # clear clicked
         return None, None, None, None
@@ -800,7 +806,11 @@ def flip_hand(
         if keypts[21:, :].sum() != 0:
             keypts[21:, 0] = opts.image_size[1] - keypts[21:, 0]
             # keypts[21:, 1] = opts.image_size[0] - keypts[21:, 1]
-    return img, pose_img, cond, keypts
 def resize_to_full(img):
@@ -812,6 +822,9 @@ def resize_to_full(img):
 def clear_all():
     return (
         None,
         None,
         False,
@@ -828,6 +841,8 @@ def clear_all():
         1,
         42,
         3.0,
     )
@@ -878,7 +893,7 @@ def enable_component(image1, image2):
     return gr.update(interactive=True)
-def set_visible(checkbox, kpts, img_clean, img_pose_right, img_pose_left):
     if kpts is None:
         kpts = [[], []]
     if "Right hand" not in checkbox:
@@ -901,7 +916,7 @@ def set_visible(checkbox, kpts, img_clean, img_pose_right, img_pose_left):
         update_left = gr.update(visible=True)
         update_l_info = gr.update(visible=True)
-    return (
         kpts,
         vis_right,
         vis_left,
@@ -913,12 +928,69 @@ def set_visible(checkbox, kpts, img_clean, img_pose_right, img_pose_left):
         update_left,
         update_r_info,
         update_l_info,
     )
 LENGTH = 480
-example_imgs = [
     [
         "sample_images/sample1.jpg",
     ],
@@ -931,18 +1003,61 @@ example_imgs = [
     [
         "sample_images/sample4.jpg",
     ],
-    [
-        "sample_images/sample5.jpg",
-    ],
     [
         "sample_images/sample6.jpg",
     ],
     [
-        "sample_images/sample7.jpg",
-    ],
-    [
-        "sample_images/sample8.jpg",
     ],
     [
         "sample_images/sample9.jpg",
     ],
@@ -953,29 +1068,30 @@ example_imgs = [
         "sample_images/sample11.jpg",
     ],
     ["pose_images/pose1.jpg"],
-    ["pose_images/pose2.jpg"],
-    ["pose_images/pose3.jpg"],
-    ["pose_images/pose4.jpg"],
-    ["pose_images/pose5.jpg"],
-    ["pose_images/pose6.jpg"],
-    ["pose_images/pose7.jpg"],
-    ["pose_images/pose8.jpg"],
 ]
 fix_example_imgs = [
     ["bad_hands/1.jpg"],  # "bad_hands/1_mask.jpg"],
-    ["bad_hands/2.jpg"],  # "bad_hands/2_mask.jpg"],
     ["bad_hands/3.jpg"],  # "bad_hands/3_mask.jpg"],
-    ["bad_hands/4.jpg"],  # "bad_hands/4_mask.jpg"],
     ["bad_hands/5.jpg"],  # "bad_hands/5_mask.jpg"],
     ["bad_hands/6.jpg"],  # "bad_hands/6_mask.jpg"],
     ["bad_hands/7.jpg"],  # "bad_hands/7_mask.jpg"],
-    ["bad_hands/8.jpg"],  # "bad_hands/8_mask.jpg"],
-    ["bad_hands/9.jpg"],  # "bad_hands/9_mask.jpg"],
-    ["bad_hands/10.jpg"],  # "bad_hands/10_mask.jpg"],
-    ["bad_hands/11.jpg"],  # "bad_hands/11_mask.jpg"],
-    ["bad_hands/12.jpg"],  # "bad_hands/12_mask.jpg"],
-    ["bad_hands/13.jpg"],  # "bad_hands/13_mask.jpg"],
 ]
 custom_css = """
 .gradio-container .examples img {
@@ -985,11 +1101,26 @@ custom_css = """
 """
 _HEADER_ = '''
-<h1><b>FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation</b></h1>
-<h2>
-    📝<a href='https://arxiv.org/abs/2412.02690' target='_blank'>Paper</a>
-    📢<a href='https://ivl.cs.brown.edu/research/foundhand.html' target='_blank'>Project</a>
-</h2>
 '''
 _CITE_ = r"""
@@ -1003,11 +1134,17 @@ _CITE_ = r"""
 ```
 """
-with gr.Blocks(css=custom_css) as demo:
     gr.Markdown(_HEADER_)
     with gr.Tab("Edit Hand Poses"):
         ref_img = gr.State(value=None)
         ref_cond = gr.State(value=None)
         keypts = gr.State(value=None)
         target_img = gr.State(value=None)
         target_cond = gr.State(value=None)
@@ -1016,9 +1153,11 @@ with gr.Blocks(css=custom_css) as demo:
         with gr.Row():
             with gr.Column():
                 gr.Markdown(
-                    """<p style="text-align: center; font-size: 25px; font-weight: bold; ">1. Reference</p>"""
                 )
-                gr.Markdown("""<p style="text-align: center;"><br></p>""")
                 ref = gr.ImageEditor(
                     type="numpy",
                     label="Reference",
@@ -1029,21 +1168,114 @@ with gr.Blocks(css=custom_css) as demo:
                     layers=False,
                     crop_size="1:1",
                 )
                 ref_finish_crop = gr.Button(value="Finish Cropping", interactive=False)
-                ref_pose = gr.Image(
-                    type="numpy",
-                    label="Reference Pose",
-                    show_label=True,
-                    height=LENGTH,
-                    width=LENGTH,
-                    interactive=False,
                 )
                 ref_flip = gr.Checkbox(
                     value=False, label="Flip Handedness (Reference)", interactive=False
                 )
             with gr.Column():
                 gr.Markdown(
-                    """<p style="text-align: center; font-size: 25px; font-weight: bold;">2. Target</p>"""
                 )
                 target = gr.ImageEditor(
                     type="numpy",
@@ -1055,6 +1287,10 @@ with gr.Blocks(css=custom_css) as demo:
                     layers=False,
                     crop_size="1:1",
                 )
                 target_finish_crop = gr.Button(
                     value="Finish Cropping", interactive=False
                 )
@@ -1066,19 +1302,19 @@ with gr.Blocks(css=custom_css) as demo:
                     width=LENGTH,
                     interactive=False,
                 )
                 target_flip = gr.Checkbox(
                     value=False, label="Flip Handedness (Target)", interactive=False
                 )
             with gr.Column():
                 gr.Markdown(
-                    """<p style="text-align: center; font-size: 25px; font-weight: bold;">3. Result</p>"""
-                )
-                gr.Markdown(
-                    """<p style="text-align: center;">Run is enabled after the images have been processed</p>"""
                 )
                 run = gr.Button(value="Run", interactive=False)
                 gr.Markdown(
-                    """<p style="text-align: center;">~20s per generation with RTX3090. ~50s with A100. <br>(For example, if you set Number of generations as 2, it would take around 40s)</p>"""
                 )
                 results = gr.Gallery(
                     type="numpy",
@@ -1100,42 +1336,98 @@ with gr.Blocks(css=custom_css) as demo:
                     interactive=False,
                     preview=True,
                 )
                 clear = gr.ClearButton()
-        with gr.Row():
-            n_generation = gr.Slider(
-                label="Number of generations",
-                value=1,
-                minimum=1,
-                maximum=MAX_N,
-                step=1,
-                randomize=False,
-                interactive=True,
-            )
-            seed = gr.Slider(
-                label="Seed",
-                value=42,
-                minimum=0,
-                maximum=10000,
-                step=1,
-                randomize=False,
-                interactive=True,
-            )
-            cfg = gr.Slider(
-                label="Classifier free guidance scale",
-                value=2.5,
-                minimum=0.0,
-                maximum=10.0,
-                step=0.1,
-                randomize=False,
-                interactive=True,
-            )
         ref.change(enable_component, [ref, ref], ref_finish_crop)
-        ref_finish_crop.click(get_ref_anno, [ref], [ref_img, ref_pose, ref_cond])
         ref_pose.change(enable_component, [ref_img, ref_pose], ref_flip)
         ref_flip.select(
-            flip_hand, [ref, ref_pose, ref_cond], [ref, ref_pose, ref_cond, dump]
         )
         target.change(enable_component, [target, target], target_finish_crop)
         target_finish_crop.click(
@@ -1150,6 +1442,7 @@ with gr.Blocks(css=custom_css) as demo:
             [target, target_pose, target_cond, target_keypts],
         )
         ref_pose.change(enable_component, [ref_pose, target_pose], run)
         target_pose.change(enable_component, [ref_pose, target_pose], run)
         run.click(
             sample_diff,
@@ -1161,7 +1454,10 @@ with gr.Blocks(css=custom_css) as demo:
             [],
             [
                 ref,
                 ref_pose,
                 ref_flip,
                 target,
                 target_pose,
@@ -1170,23 +1466,35 @@ with gr.Blocks(css=custom_css) as demo:
                 results_pose,
                 ref_img,
                 ref_cond,
-                # mask,
                 target_img,
                 target_cond,
                 target_keypts,
                 n_generation,
                 seed,
                 cfg,
             ],
         )
-        gr.Markdown("""<p style="font-size: 25px; font-weight: bold;">Examples</p>""")
-        with gr.Tab("Reference"):
-            with gr.Row():
-                gr.Examples(example_imgs, [ref], examples_per_page=20)
-        with gr.Tab("Target"):
-            with gr.Row():
-                gr.Examples(example_imgs, [target], examples_per_page=20)
     with gr.Tab("Fix Hands"):
         fix_inpaint_mask = gr.State(value=None)
         fix_original = gr.State(value=None)
@@ -1197,19 +1505,13 @@ with gr.Blocks(css=custom_css) as demo:
         fix_target_cond = gr.State(value=None)
         fix_latent = gr.State(value=None)
         fix_inpaint_latent = gr.State(value=None)
-        # fix_size_memory = gr.State(value=(0, 0))
-        gr.Markdown("""<p style="text-align: center; font-size: 25px; font-weight: bold; ">⚠️ Note</p>""")
-        gr.Markdown("""<p>"Fix Hands" with A100 needs around 6 mins, which is beyond the ZeroGPU quota (5 mins). Please either purchase additional gpus from Hugging Face or wait for us to open-source our code soon so that you can use your own gpus🙏 </p>""")
         with gr.Row():
             with gr.Column():
                 gr.Markdown(
-                    """<p style="text-align: center; font-size: 25px; font-weight: bold; ">1. Image Cropping & Brushing</p>"""
-                )
-                gr.Markdown(
-                    """<p style="text-align: center;">Crop the image around the hand.<br>Then, brush area (e.g., wrong finger) that needs to be fixed.</p>"""
                 )
                 gr.Markdown(
-                    """<p style="text-align: center; font-size: 20px; font-weight: bold; ">A. Crop</p>"""
                 )
                 fix_crop = gr.ImageEditor(
                     type="numpy",
@@ -1224,8 +1526,13 @@ with gr.Blocks(css=custom_css) as demo:
                     image_mode="RGBA",
                     container=False,
                 )
                 gr.Markdown(
-                    """<p style="text-align: center; font-size: 20px; font-weight: bold; ">B. Brush</p>"""
                 )
                 fix_ref = gr.ImageEditor(
                     type="numpy",
@@ -1246,32 +1553,21 @@ with gr.Blocks(css=custom_css) as demo:
                 fix_finish_crop = gr.Button(
                     value="Finish Croping & Brushing", interactive=False
                 )
-                gr.Markdown(
-                    """<p style="text-align: left; font-size: 20px; font-weight: bold; ">OpenPose keypoints convention</p>"""
-                )
-                fix_openpose = gr.Image(
-                    value="openpose.png",
-                    type="numpy",
-                    label="OpenPose keypoints convention",
-                    show_label=True,
-                    height=LENGTH // 3 * 2,
-                    width=LENGTH // 3 * 2,
-                    interactive=False,
-                )
             with gr.Column():
                 gr.Markdown(
-                    """<p style="text-align: center; font-size: 25px; font-weight: bold; ">2. Keypoint Selection</p>"""
                 )
                 gr.Markdown(
-                    """<p style="text-align: center;">On the hand, select 21 keypoints that you hope the output to be. <br>Please see the \"OpenPose keypoints convention\" on the bottom left.</p>"""
                 )
                 fix_checkbox = gr.CheckboxGroup(
                     ["Right hand", "Left hand"],
-                    # value=["Right hand", "Left hand"],
-                    label="Hand side",
-                    info="Which side this hand is? Could be both.",
                     interactive=False,
                 )
                 fix_kp_r_info = gr.Markdown(
                     """<p style="text-align: center; font-size: 20px; font-weight: bold; ">Select right only</p>""",
                     visible=False,
@@ -1314,22 +1610,24 @@ with gr.Blocks(css=custom_css) as demo:
                     fix_reset_left = gr.Button(
                         value="Reset", interactive=False, visible=False
                     )
-            with gr.Column():
                 gr.Markdown(
-                    """<p style="text-align: center; font-size: 25px; font-weight: bold; ">3. Prepare Mask</p>"""
                 )
                 gr.Markdown(
-                    """<p style="text-align: center;">In Fix Hands, not segmentation mask, but only inpaint mask is used.</p>"""
                 )
                 fix_ready = gr.Button(value="Ready", interactive=False)
-                fix_mask_size = gr.Radio(
-                    ["256x256", "latent size (32x32)"],
-                    label="Visualized inpaint mask size",
-                    interactive=False,
-                    value="256x256",
-                )
                 gr.Markdown(
-                    """<p style="text-align: center; font-size: 20px; font-weight: bold; ">Visualized inpaint masks</p>"""
                 )
                 fix_vis_mask32 = gr.Image(
                     type="numpy",
@@ -1342,20 +1640,22 @@ with gr.Blocks(css=custom_css) as demo:
                 )
                 fix_vis_mask256 = gr.Image(
                     type="numpy",
-                    label=f"Visualized {opts.image_size} Inpaint Mask",
                     visible=True,
-                    show_label=True,
                     height=opts.image_size,
                     width=opts.image_size,
                     interactive=False,
                 )
             with gr.Column():
                 gr.Markdown(
-                    """<p style="text-align: center; font-size: 25px; font-weight: bold; ">4. Results</p>"""
                 )
                 fix_run = gr.Button(value="Run", interactive=False)
                 gr.Markdown(
-                    """<p style="text-align: center;">>3min and ~24GB per generation</p>"""
                 )
                 fix_result = gr.Gallery(
                     type="numpy",
@@ -1377,9 +1677,16 @@ with gr.Blocks(css=custom_css) as demo:
                     interactive=False,
                     preview=True,
                 )
                 fix_clear = gr.ClearButton()
         gr.Markdown(
-            "[NOTE] Currently, Number of generation > 1 could lead to out-of-memory"
         )
         with gr.Row():
             fix_n_generation = gr.Slider(
@@ -1422,8 +1729,6 @@ with gr.Blocks(css=custom_css) as demo:
         fix_crop.change(resize_to_full, fix_crop, fix_ref)
         fix_ref.change(enable_component, [fix_ref, fix_ref], fix_finish_crop)
         fix_finish_crop.click(get_mask_inpaint, [fix_ref], [fix_inpaint_mask])
-        # fix_finish_crop.click(lambda x: x["background"], [fix_ref], [fix_kp_right])
-        # fix_finish_crop.click(lambda x: x["background"], [fix_ref], [fix_kp_left])
         fix_finish_crop.click(lambda x: x["background"], [fix_crop], [fix_original])
         fix_finish_crop.click(visualize_ref, [fix_crop, fix_ref], [fix_img])
         fix_img.change(lambda x: x, [fix_img], [fix_kp_right])
@@ -1452,9 +1757,6 @@ with gr.Blocks(css=custom_css) as demo:
         fix_inpaint_mask.change(
             enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_ready
         )
-        # fix_inpaint_mask.change(
-        #     enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_run
-        # )
         fix_checkbox.select(
             set_visible,
             [fix_checkbox, fix_kpts, fix_img, fix_kp_right, fix_kp_left],
@@ -1490,14 +1792,9 @@ with gr.Blocks(css=custom_css) as demo:
         fix_reset_left.click(
             reset_kps, [fix_img, fix_kpts, gr.State("left")], [fix_kp_left, fix_kpts]
         )
-        # fix_kpts.change(check_keypoints, [fix_kpts], [fix_kp_right, fix_kp_left, fix_run])
-        # fix_run.click(lambda x:gr.update(value=None), [], [fix_result, fix_result_pose])
         fix_vis_mask32.change(
             enable_component, [fix_vis_mask32, fix_vis_mask256], fix_run
         )
-        fix_vis_mask32.change(
-            enable_component, [fix_vis_mask32, fix_vis_mask256], fix_mask_size
-        )
         fix_ready.click(
             ready_sample,
             [fix_original, fix_inpaint_mask, fix_kpts],
@@ -1511,9 +1808,6 @@ with gr.Blocks(css=custom_css) as demo:
                 fix_vis_mask256,
             ],
         )
-        fix_mask_size.select(
-            switch_mask_size, [fix_mask_size], [fix_vis_mask32, fix_vis_mask256]
-        )
         fix_run.click(
             sample_inpaint,
             [
@@ -1551,32 +1845,20 @@ with gr.Blocks(css=custom_css) as demo:
                 fix_latent,
                 fix_inpaint_latent,
                 fix_n_generation,
-                # fix_size_memory,
                 fix_seed,
                 fix_cfg,
                 fix_quality,
             ],
         )
-        gr.Markdown("""<p style="font-size: 25px; font-weight: bold;">Examples</p>""")
-        fix_dump_ex = gr.Image(value=None, label="Original Image", visible=False)
-        fix_dump_ex_masked = gr.Image(value=None, label="After Brushing", visible=False)
-        with gr.Column():
-            fix_example = gr.Examples(
-                fix_example_imgs,
-                # run_on_click=True,
-                # fn=parse_fix_example,
-                # inputs=[fix_dump_ex, fix_dump_ex_masked],
-                # outputs=[fix_original, fix_ref, fix_img, fix_inpaint_mask],
-                inputs=[fix_crop],
-                examples_per_page=20,
-            )
     gr.Markdown("<h1>Citation</h1>")
     gr.Markdown(_CITE_)
-# print("Ready to launch..")
-# _, _, shared_url = demo.queue().launch(
-#     share=True, server_name="0.0.0.0", server_port=7739
-# )
-demo.launch(share=True)

     min_detection_confidence=0.1,
 )
+def prepare_ref_anno(ref):
     if ref is None:
         return (
             None,
     img = ref["composite"][..., :3]
     img = cv2.resize(img, opts.image_size, interpolation=cv2.INTER_AREA)
     keypts = np.zeros((42, 2))
+    mp_pose = hands.process(img)
+    if mp_pose.multi_hand_landmarks:
+        # handedness is flipped assuming the input image is mirrored in MediaPipe
+        for hand_landmarks, handedness in zip(
+            mp_pose.multi_hand_landmarks, mp_pose.multi_handedness
+        ):
+            # actually right hand
+            if handedness.classification[0].label == "Left":
+                start_idx = 0
+            # actually left hand
+            elif handedness.classification[0].label == "Right":
+                start_idx = 21
+            for i, landmark in enumerate(hand_landmarks.landmark):
+                keypts[start_idx + i] = [
+                    landmark.x * opts.image_size[1],
+                    landmark.y * opts.image_size[0],
+                ]
+        print(f"keypts.max(): {keypts.max()}, keypts.min(): {keypts.min()}")
+        return img, keypts
+    else:
+        return img, None
+def get_ref_anno(img, keypts):
+    if keypts is None:
+        no_hands = cv2.resize(np.array(Image.open("no_hands.png"))[..., :3], (LENGTH, LENGTH))
+        return None, no_hands, None
+    if isinstance(keypts, list):
+        if len(keypts[0]) == 0:
+            keypts[0] = np.zeros((21, 2))
+        elif len(keypts[0]) == 21:
+            keypts[0] = np.array(keypts[0], dtype=np.float32)
         else:
+            gr.Info("Number of right hand keypoints should be either 0 or 21.")
+            return None, None
+        if len(keypts[1]) == 0:
+            keypts[1] = np.zeros((21, 2))
+        elif len(keypts[1]) == 21:
+            keypts[1] = np.array(keypts[1], dtype=np.float32)
+        else:
+            gr.Info("Number of left hand keypoints should be either 0 or 21.")
+            return None, None
+        keypts = np.concatenate(keypts, axis=0)
+    if REF_POSE_MASK:
+        sam_predictor.set_image(img)
+        if keypts[0].sum() != 0 and keypts[21].sum() != 0:
+            input_point = np.array([keypts[0], keypts[21]])
+            input_label = np.array([1, 1])
+        elif keypts[0].sum() != 0:
+            input_point = np.array(keypts[:1])
+            input_label = np.array([1])
+        elif keypts[21].sum() != 0:
+            input_point = np.array(keypts[21:22])
+            input_label = np.array([1])
+        masks, _, _ = sam_predictor.predict(
+            point_coords=input_point,
+            point_labels=input_label,
+            multimask_output=False,
+        )
+        hand_mask = masks[0]
+        masked_img = img * hand_mask[..., None] + 255 * (1 - hand_mask[..., None])
+        ref_pose = visualize_hand(keypts, masked_img)
     else:
         hand_mask = np.zeros_like(img[:,:, 0])
         ref_pose = np.zeros_like(img)
     def make_ref_cond(
         img,
         keypts,
                 Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True),
             ]
         )
+        image = image_transform(img) # .to(device)
         kpts_valid = check_keypoints_validity(keypts, target_size)
         heatmaps = torch.tensor(
             keypoint_heatmap(
             )
             * kpts_valid[:, None, None],
             dtype=torch.float,
+            # device=device
         )[None, ...]
         mask = torch.tensor(
             cv2.resize(
                 interpolation=cv2.INTER_NEAREST,
             ),
             dtype=torch.float,
+            # device=device,
         ).unsqueeze(0)[None, ...]
         return image[None, ...], heatmaps, mask
     print(f"results[0].max(): {results[0].max()}")
     return results, results_pose
+@spaces.GPU(duration=120)
 def ready_sample(img_ori, inpaint_mask, keypts):
     img = cv2.resize(img_ori[..., :3], opts.image_size, interpolation=cv2.INTER_AREA)
     sam_predictor.set_image(img)
     keypts = np.concatenate(keypts, axis=0)
     keypts = scale_keypoint(keypts, (LENGTH, LENGTH), opts.image_size)
     box_shift_ratio = 0.5
     box_size_factor = 1.2
 def flip_hand(
+    img, pose_img, cond: Optional[torch.Tensor], keypts: Optional[torch.Tensor] = None, pose_manual_img = None,
+    manual_kp_right=None, manual_kp_left=None
 ):
     if cond is None:  # clear clicked
         return None, None, None, None
         if keypts[21:, :].sum() != 0:
             keypts[21:, 0] = opts.image_size[1] - keypts[21:, 0]
             # keypts[21:, 1] = opts.image_size[0] - keypts[21:, 1]
+    if pose_manual_img is not None:
+        pose_manual_img = pose_manual_img[:, ::-1, :]
+        manual_kp_right = manual_kp_right[:, ::-1, :]
+        manual_kp_left = manual_kp_left[:, ::-1, :]
+    return img, pose_img, cond, keypts, pose_manual_img, manual_kp_right, manual_kp_left
 def resize_to_full(img):
 def clear_all():
     return (
+        None,
+        None,
+        None,
         None,
         None,
         False,
         1,
         42,
         3.0,
+        gr.update(interactive=False),
+        []
     )
     return gr.update(interactive=True)
+def set_visible(checkbox, kpts, img_clean, img_pose_right, img_pose_left, done=None, done_info=None):
     if kpts is None:
         kpts = [[], []]
     if "Right hand" not in checkbox:
         update_left = gr.update(visible=True)
         update_l_info = gr.update(visible=True)
+    ret = [
         kpts,
         vis_right,
         vis_left,
         update_left,
         update_r_info,
         update_l_info,
+    ]
+    if done is not None:
+        if not checkbox:
+            ret.append(gr.update(visible=False))
+            ret.append(gr.update(visible=False))
+        else:
+            ret.append(gr.update(visible=True))
+            ret.append(gr.update(visible=True))
+    return tuple(ret)
+def set_unvisible():
+    return (
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False)
     )
+def set_no_hands(decider, component):
+    if decider is None:
+        no_hands = cv2.resize(np.array(Image.open("no_hands.png"))[..., :3], (LENGTH, LENGTH))
+        return no_hands
+    else:
+        return component
+# def visible_component(decider, component):
+#     if decider is not None:
+#         update_component = gr.update(visible=True)
+#     else:
+#         update_component = gr.update(visible=False)
+#     return update_component
+def unvisible_component(decider, component):
+    if decider is not None:
+        update_component = gr.update(visible=False)
+    else:
+        update_component = gr.update(visible=True)
+    return update_component
+def make_change(decider, state):
+    '''
+    if decider is not None, change the state's value. True/False does not matter.
+    '''
+    if decider is not None:
+        if state:
+            state = False
+        else:
+            state = True
+        return state
+    else:
+        return state
 LENGTH = 480
+example_ref_imgs = [
     [
         "sample_images/sample1.jpg",
     ],
     [
         "sample_images/sample4.jpg",
     ],
+    # [
+    #     "sample_images/sample5.jpg",
+    # ],
     [
         "sample_images/sample6.jpg",
     ],
+    # [
+    #     "sample_images/sample7.jpg",
+    # ],
+    # [
+    #     "sample_images/sample8.jpg",
+    # ],
+    # [
+    #     "sample_images/sample9.jpg",
+    # ],
+    # [
+    #     "sample_images/sample10.jpg",
+    # ],
+    # [
+    #     "sample_images/sample11.jpg",
+    # ],
+    # ["pose_images/pose1.jpg"],
+    # ["pose_images/pose2.jpg"],
+    # ["pose_images/pose3.jpg"],
+    # ["pose_images/pose4.jpg"],
+    # ["pose_images/pose5.jpg"],
+    # ["pose_images/pose6.jpg"],
+    # ["pose_images/pose7.jpg"],
+    # ["pose_images/pose8.jpg"],
+]
+example_target_imgs = [
+    # [
+    #     "sample_images/sample1.jpg",
+    # ],
+    # [
+    #     "sample_images/sample2.jpg",
+    # ],
+    # [
+    #     "sample_images/sample3.jpg",
+    # ],
+    # [
+    #     "sample_images/sample4.jpg",
+    # ],
     [
+        "sample_images/sample5.jpg",
     ],
+    # [
+        # "sample_images/sample6.jpg",
+    # ],
+    # [
+    #     "sample_images/sample7.jpg",
+    # ],
+    # [
+    #     "sample_images/sample8.jpg",
+    # ],
     [
         "sample_images/sample9.jpg",
     ],
         "sample_images/sample11.jpg",
     ],
     ["pose_images/pose1.jpg"],
+    # ["pose_images/pose2.jpg"],
+    # ["pose_images/pose3.jpg"],
+    # ["pose_images/pose4.jpg"],
+    # ["pose_images/pose5.jpg"],
+    # ["pose_images/pose6.jpg"],
+    # ["pose_images/pose7.jpg"],
+    # ["pose_images/pose8.jpg"],
 ]
 fix_example_imgs = [
     ["bad_hands/1.jpg"],  # "bad_hands/1_mask.jpg"],
+    # ["bad_hands/2.jpg"],  # "bad_hands/2_mask.jpg"],
     ["bad_hands/3.jpg"],  # "bad_hands/3_mask.jpg"],
+    # ["bad_hands/4.jpg"],  # "bad_hands/4_mask.jpg"],
     ["bad_hands/5.jpg"],  # "bad_hands/5_mask.jpg"],
     ["bad_hands/6.jpg"],  # "bad_hands/6_mask.jpg"],
     ["bad_hands/7.jpg"],  # "bad_hands/7_mask.jpg"],
+    # ["bad_hands/8.jpg"],  # "bad_hands/8_mask.jpg"],
+    # ["bad_hands/9.jpg"],  # "bad_hands/9_mask.jpg"],
+    # ["bad_hands/10.jpg"],  # "bad_hands/10_mask.jpg"],
+    # ["bad_hands/11.jpg"],  # "bad_hands/11_mask.jpg"],
+    # ["bad_hands/12.jpg"],  # "bad_hands/12_mask.jpg"],
+    # ["bad_hands/13.jpg"],  # "bad_hands/13_mask.jpg"],
+    ["bad_hands/14.jpg"],
+    ["bad_hands/15.jpg"],
 ]
 custom_css = """
 .gradio-container .examples img {
 """
 _HEADER_ = '''
+<div style="text-align: center;">
+    <h1><b>FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation</b></h1>
+    <h2 style="color: #777777;">CVPR 2025</h2>
+    <style>
+        .link-spacing {
+            margin-right: 20px;
+        }
+    </style>
+    <p style="font-size: 15px;">
+        <span style="display: inline-block; margin-right: 30px;">Brown University</span>
+        <span style="display: inline-block;">Meta Reality Labs</span>
+    </p>
+    <h3>
+        <a href='https://arxiv.org/abs/2412.02690' target='_blank' class="link-spacing">Paper</a>
+        <a href='https://ivl.cs.brown.edu/research/foundhand.html' target='_blank' class="link-spacing">Project Page</a>
+        <a href='' target='_blank' class="link-spacing">Code</a>
+        <a href='' target='_blank'>Model Weights</a>
+    </h3>
+    <p>Below are two important abilities of our model. First, we can <b>edit hand poses</b> given two hand images - one is the image to edit, and the other one provides target hand pose. Second, we can automatically <b>fix malformed hand images</b>, following the user-provided target hand pose and area to fix.</p>
+</div>
 '''
 _CITE_ = r"""
 ```
 """
+with gr.Blocks(css=custom_css, theme="soft") as demo:
     gr.Markdown(_HEADER_)
     with gr.Tab("Edit Hand Poses"):
         ref_img = gr.State(value=None)
+        ref_im_raw = gr.State(value=None)
+        ref_kp_raw = gr.State(value=0)
+        ref_kp_got = gr.State(value=None)
+        dump = gr.State(value=None)
         ref_cond = gr.State(value=None)
+        ref_manual_cond = gr.State(value=None)
+        ref_auto_cond = gr.State(value=None)
         keypts = gr.State(value=None)
         target_img = gr.State(value=None)
         target_cond = gr.State(value=None)
         with gr.Row():
             with gr.Column():
                 gr.Markdown(
+                    """<p style="text-align: center; font-size: 20px; font-weight: bold;">1. Upload a hand image to edit 📥</p>"""
+                )
+                gr.Markdown(
+                    """<p style="text-align: center;">&#9312; Optionally crop the image</p>"""
                 )
                 ref = gr.ImageEditor(
                     type="numpy",
                     label="Reference",
                     layers=False,
                     crop_size="1:1",
                 )
+                gr.Examples(example_ref_imgs, [ref], examples_per_page=20)
+                gr.Markdown(
+                    """<p style="text-align: center;">&#9313; Hit the &quot;Finish Cropping&quot; button to get hand pose</p>"""
+                )
                 ref_finish_crop = gr.Button(value="Finish Cropping", interactive=False)
+                with gr.Tab("Automatic hand keypoints"):
+                    ref_pose = gr.Image(
+                        type="numpy",
+                        label="Reference Pose",
+                        show_label=True,
+                        height=LENGTH,
+                        width=LENGTH,
+                        interactive=False,
+                    )
+                    ref_use_auto = gr.Button(value="Click here to use automatic, not manual", interactive=False, visible=True)
+                with gr.Tab("Manual hand keypoints"):
+                    ref_manual_checkbox_info = gr.Markdown(
+                        """<p style="text-align: center;"><b>Step 1.</b> Tell us if this is right, left, or both hands.</p>""",
+                        visible=True,
+                    )
+                    ref_manual_checkbox = gr.CheckboxGroup(
+                        ["Right hand", "Left hand"],
+                        show_label=False,
+                        visible=True,
+                        interactive=True,
+                    )
+                    ref_manual_kp_r_info = gr.Markdown(
+                        """<p style="text-align: center;"><b>Step 2.</b> Click on image to provide hand keypoints for <b>right</b> hand. See \"OpenPose Keypoint Convention\" for guidance.</p>""",
+                        visible=False,
+                    )
+                    ref_manual_kp_right = gr.Image(
+                        type="numpy",
+                        label="Keypoint Selection (right hand)",
+                        show_label=True,
+                        height=LENGTH,
+                        width=LENGTH,
+                        interactive=False,
+                        visible=False,
+                        sources=[],
+                    )
+                    with gr.Row():
+                        ref_manual_undo_right = gr.Button(
+                            value="Undo", interactive=True, visible=False
+                        )
+                        ref_manual_reset_right = gr.Button(
+                            value="Reset", interactive=True, visible=False
+                        )
+                    ref_manual_kp_l_info = gr.Markdown(
+                        """<p style="text-align: center;"><b>Step 2.</b> Click on image to provide hand keypoints for <b>left</b> hand. See \"OpenPose keypoint convention\" for guidance.</p>""",
+                        visible=False
+                    )
+                    ref_manual_kp_left = gr.Image(
+                        type="numpy",
+                        label="Keypoint Selection (left hand)",
+                        show_label=True,
+                        height=LENGTH,
+                        width=LENGTH,
+                        interactive=False,
+                        visible=False,
+                        sources=[],
+                    )
+                    with gr.Row():
+                        ref_manual_undo_left = gr.Button(
+                            value="Undo", interactive=True, visible=False
+                        )
+                        ref_manual_reset_left = gr.Button(
+                            value="Reset", interactive=True, visible=False
+                        )
+                    ref_manual_done_info = gr.Markdown(
+                        """<p style="text-align: center;"><b>Step 3.</b> Hit \"Done\" button to confirm.</p>""",
+                        visible=False,
+                    )
+                    ref_manual_done = gr.Button(value="Done", interactive=True, visible=False)
+                    ref_manual_pose = gr.Image(
+                        type="numpy",
+                        label="Reference Pose",
+                        show_label=True,
+                        height=LENGTH,
+                        width=LENGTH,
+                        interactive=False,
+                        visible=False
+                    )
+                    ref_use_manual = gr.Button(value="Click here to use manual, not automatic", interactive=True, visible=False)
+                    ref_manual_instruct = gr.Markdown(
+                        value="""<p style="text-align: left; font-weight: bold; ">OpenPose Keypoints Convention</p>""",
+                        visible=True
+                    )
+                    ref_manual_openpose = gr.Image(
+                        value="openpose.png",
+                        type="numpy",
+                        show_label=False,
+                        height=LENGTH // 2,
+                        width=LENGTH // 2,
+                        interactive=False,
+                        visible=True
+                    )
+                gr.Markdown(
+                    """<p style="text-align: center;">&#9314; Optionally flip the hand</p>"""
                 )
                 ref_flip = gr.Checkbox(
                     value=False, label="Flip Handedness (Reference)", interactive=False
                 )
             with gr.Column():
                 gr.Markdown(
+                    """<p style="text-align: center; font-size: 20px; font-weight: bold;">2. Upload a hand image for target hand pose 📥</p>"""
+                )
+                gr.Markdown(
+                    """<p style="text-align: center;">&#9312; Optionally crop the image</p>"""
                 )
                 target = gr.ImageEditor(
                     type="numpy",
                     layers=False,
                     crop_size="1:1",
                 )
+                gr.Examples(example_target_imgs, [target], examples_per_page=20)
+                gr.Markdown(
+                    """<p style="text-align: center;">&#9313; Hit the &quot;Finish Cropping&quot; button to get hand pose</p>"""
+                )
                 target_finish_crop = gr.Button(
                     value="Finish Cropping", interactive=False
                 )
                     width=LENGTH,
                     interactive=False,
                 )
+                gr.Markdown(
+                    """<p style="text-align: center;">&#9314; Optionally flip the hand</p>"""
+                )
                 target_flip = gr.Checkbox(
                     value=False, label="Flip Handedness (Target)", interactive=False
                 )
             with gr.Column():
                 gr.Markdown(
+                    """<p style="text-align: center; font-size: 20px; font-weight: bold;">3. Press &quot;Run&quot; to get the edited results 🎯</p>"""
                 )
                 run = gr.Button(value="Run", interactive=False)
                 gr.Markdown(
+                    """<p style="text-align: center;">⚠️ ~20s per generation with RTX3090. ~50s with A100. <br>(For example, if you set Number of generations as 2, it would take around 40s)</p>"""
                 )
                 results = gr.Gallery(
                     type="numpy",
                     interactive=False,
                     preview=True,
                 )
+                gr.Markdown(
+                    """<p style="text-align: center;">✨ Hit &quot;Clear&quot; to restart from the beginning</p>"""
+                )
                 clear = gr.ClearButton()
+        with gr.Tab("More options"):
+            with gr.Row():
+                n_generation = gr.Slider(
+                    label="Number of generations",
+                    value=1,
+                    minimum=1,
+                    maximum=MAX_N,
+                    step=1,
+                    randomize=False,
+                    interactive=True,
+                )
+                seed = gr.Slider(
+                    label="Seed",
+                    value=42,
+                    minimum=0,
+                    maximum=10000,
+                    step=1,
+                    randomize=False,
+                    interactive=True,
+                )
+                cfg = gr.Slider(
+                    label="Classifier free guidance scale",
+                    value=2.5,
+                    minimum=0.0,
+                    maximum=10.0,
+                    step=0.1,
+                    randomize=False,
+                    interactive=True,
+                )
         ref.change(enable_component, [ref, ref], ref_finish_crop)
+        ref_finish_crop.click(prepare_ref_anno, [ref], [ref_im_raw, ref_kp_raw])
+        ref_kp_raw.change(lambda x: x, ref_im_raw, ref_manual_kp_right)
+        ref_kp_raw.change(lambda x: x, ref_im_raw, ref_manual_kp_left)
+        ref_manual_checkbox.select(
+            set_visible,
+            [ref_manual_checkbox, ref_kp_got, ref_im_raw, ref_manual_kp_right, ref_manual_kp_left, ref_manual_done],
+            [
+                ref_kp_got,
+                ref_manual_kp_right,
+                ref_manual_kp_left,
+                ref_manual_kp_right,
+                ref_manual_undo_right,
+                ref_manual_reset_right,
+                ref_manual_kp_left,
+                ref_manual_undo_left,
+                ref_manual_reset_left,
+                ref_manual_kp_r_info,
+                ref_manual_kp_l_info,
+                ref_manual_done,
+                ref_manual_done_info
+            ]
+        )
+        ref_manual_kp_right.select(
+            get_kps, [ref_im_raw, ref_kp_got, gr.State("right")], [ref_manual_kp_right, ref_kp_got]
+        )
+        ref_manual_undo_right.click(
+            undo_kps, [ref_im_raw, ref_kp_got, gr.State("right")], [ref_manual_kp_right, ref_kp_got]
+        )
+        ref_manual_reset_right.click(
+            reset_kps, [ref_im_raw, ref_kp_got, gr.State("right")], [ref_manual_kp_right, ref_kp_got]
+        )
+        ref_manual_kp_left.select(
+            get_kps, [ref_im_raw, ref_kp_got, gr.State("left")], [ref_manual_kp_left, ref_kp_got]
+        )
+        ref_manual_undo_left.click(
+            undo_kps, [ref_im_raw, ref_kp_got, gr.State("left")], [ref_manual_kp_left, ref_kp_got]
+        )
+        ref_manual_reset_left.click(
+            reset_kps, [ref_im_raw, ref_kp_got, gr.State("left")], [ref_manual_kp_left, ref_kp_got]
+        )
+        ref_manual_done.click(get_ref_anno, [ref_im_raw, ref_kp_got], [ref_img, ref_manual_pose, ref_manual_cond])
+        ref_manual_cond.change(lambda x: x, ref_manual_cond, ref_cond)
+        ref_use_manual.click(lambda x: x, ref_manual_cond, ref_cond)
+        ref_use_manual.click(lambda x: gr.Info("Manual hand keypoints will be used for 'Reference'", duration=3))
+        ref_manual_done.click(lambda x: gr.update(visible=True), ref_manual_pose, ref_manual_pose)
+        ref_manual_done.click(lambda x: gr.update(visible=True), ref_use_manual, ref_use_manual)
+        ref_manual_pose.change(enable_component, [ref_manual_pose, ref_manual_pose], ref_manual_done)
+        ref_kp_raw.change(get_ref_anno, [ref_im_raw, ref_kp_raw], [ref_img, ref_pose, ref_auto_cond])
+        ref_auto_cond.change(lambda x: x, ref_auto_cond, ref_cond)
+        ref_use_auto.click(lambda x: x, ref_auto_cond, ref_cond)
+        ref_use_auto.click(lambda x: gr.Info("Automatic hand keypoints will be used for 'Reference'", duration=3))
+        ref_pose.change(enable_component, [ref_kp_raw, ref_pose], ref_use_auto)
         ref_pose.change(enable_component, [ref_img, ref_pose], ref_flip)
+        ref_manual_pose.change(enable_component, [ref_img, ref_manual_pose], ref_flip)
         ref_flip.select(
+            flip_hand, [ref, ref_pose, ref_cond, gr.State(value=None), ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left], [ref, ref_pose, ref_cond, dump, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left]
         )
         target.change(enable_component, [target, target], target_finish_crop)
         target_finish_crop.click(
             [target, target_pose, target_cond, target_keypts],
         )
         ref_pose.change(enable_component, [ref_pose, target_pose], run)
+        ref_manual_pose.change(enable_component, [ref_manual_pose, target_pose], run)
         target_pose.change(enable_component, [ref_pose, target_pose], run)
         run.click(
             sample_diff,
             [],
             [
                 ref,
+                ref_manual_kp_right,
+                ref_manual_kp_left,
                 ref_pose,
+                ref_manual_pose,
                 ref_flip,
                 target,
                 target_pose,
                 results_pose,
                 ref_img,
                 ref_cond,
                 target_img,
                 target_cond,
                 target_keypts,
                 n_generation,
                 seed,
                 cfg,
+                ref_kp_raw,
+                ref_manual_checkbox
             ],
         )
+        clear.click(
+            set_unvisible,
+            [],
+            [
+                ref_manual_kp_r_info,
+                ref_manual_kp_l_info,
+                ref_manual_undo_left,
+                ref_manual_undo_right,
+                ref_manual_reset_left,
+                ref_manual_reset_right,
+                ref_manual_done,
+                ref_manual_done_info,
+                ref_manual_pose,
+                ref_use_manual,
+                ref_manual_kp_right,
+                ref_manual_kp_left
+            ]
+        )
     with gr.Tab("Fix Hands"):
         fix_inpaint_mask = gr.State(value=None)
         fix_original = gr.State(value=None)
         fix_target_cond = gr.State(value=None)
         fix_latent = gr.State(value=None)
         fix_inpaint_latent = gr.State(value=None)
         with gr.Row():
             with gr.Column():
                 gr.Markdown(
+                    """<p style="text-align: center; font-size: 20px; font-weight: bold;">1. Upload a malformed hand image to fix 📥</p>"""
                 )
                 gr.Markdown(
+                    """<p style="text-align: center;">&#9312; Optionally crop the image around the hand</p>"""
                 )
                 fix_crop = gr.ImageEditor(
                     type="numpy",
                     image_mode="RGBA",
                     container=False,
                 )
+                fix_example = gr.Examples(
+                    fix_example_imgs,
+                    inputs=[fix_crop],
+                    examples_per_page=20,
+                )
                 gr.Markdown(
+                    """<p style="text-align: center;">&#9313; Brush area (e.g., wrong finger) that needs to be fixed. This will serve as an inpaint mask</p>"""
                 )
                 fix_ref = gr.ImageEditor(
                     type="numpy",
                 fix_finish_crop = gr.Button(
                     value="Finish Croping & Brushing", interactive=False
                 )
             with gr.Column():
                 gr.Markdown(
+                    """<p style="text-align: center; font-size: 20px; font-weight: bold;">2. Click on hand to get target hand pose</p>"""
                 )
                 gr.Markdown(
+                    """<p style="text-align: center;">&#9312; Tell us if this is right, left, or both hands</p>"""
                 )
                 fix_checkbox = gr.CheckboxGroup(
                     ["Right hand", "Left hand"],
+                    show_label=False,
                     interactive=False,
                 )
+                gr.Markdown(
+                    """<p style="text-align: center;">&#9313; On the image, click 21 hand keypoints. This will serve as target hand poses. See the \"OpenPose keypoints convention\" for guidance.</p>"""
+                )
                 fix_kp_r_info = gr.Markdown(
                     """<p style="text-align: center; font-size: 20px; font-weight: bold; ">Select right only</p>""",
                     visible=False,
                     fix_reset_left = gr.Button(
                         value="Reset", interactive=False, visible=False
                     )
                 gr.Markdown(
+                    """<p style="text-align: left; font-weight: bold; ">OpenPose keypoints convention</p>"""
+                )
+                fix_openpose = gr.Image(
+                    value="openpose.png",
+                    type="numpy",
+                    show_label=False,
+                    height=LENGTH // 2,
+                    width=LENGTH // 2,
+                    interactive=False,
                 )
+            with gr.Column():
                 gr.Markdown(
+                    """<p style="text-align: center; font-size: 20px; font-weight: bold;">3. Press &quot;Ready&quot; to start pre-processing</p>"""
                 )
                 fix_ready = gr.Button(value="Ready", interactive=False)
                 gr.Markdown(
+                    """<p style="text-align: center; font-weight: bold; ">Visualized (256, 256) Inpaint Mask</p>"""
                 )
                 fix_vis_mask32 = gr.Image(
                     type="numpy",
                 )
                 fix_vis_mask256 = gr.Image(
                     type="numpy",
                     visible=True,
+                    show_label=False,
                     height=opts.image_size,
                     width=opts.image_size,
                     interactive=False,
                 )
+                gr.Markdown(
+                    """<p style="text-align: center;">[NOTE] Above should be inpaint mask that you brushed, NOT the segmentation mask of the entire hand. </p>"""
+                )
             with gr.Column():
                 gr.Markdown(
+                    """<p style="text-align: center; font-size: 20px; font-weight: bold;">4. Press &quot;Run&quot; to get the fixed hand image 🎯</p>"""
                 )
                 fix_run = gr.Button(value="Run", interactive=False)
                 gr.Markdown(
+                    """<p style="text-align: center;">⚠️  >3min and ~24GB per generation</p>"""
                 )
                 fix_result = gr.Gallery(
                     type="numpy",
                     interactive=False,
                     preview=True,
                 )
+                gr.Markdown(
+                    """<p style="text-align: center;">✨ Hit &quot;Clear&quot; to restart from the beginning</p>"""
+                )
                 fix_clear = gr.ClearButton()
+        gr.Markdown(
+            """<p style="text-align: left; font-size: 25px;"><b>More options</b></p>"""
+        )
         gr.Markdown(
+            "⚠️ Currently, Number of generation > 1 could lead to out-of-memory"
         )
         with gr.Row():
             fix_n_generation = gr.Slider(
         fix_crop.change(resize_to_full, fix_crop, fix_ref)
         fix_ref.change(enable_component, [fix_ref, fix_ref], fix_finish_crop)
         fix_finish_crop.click(get_mask_inpaint, [fix_ref], [fix_inpaint_mask])
         fix_finish_crop.click(lambda x: x["background"], [fix_crop], [fix_original])
         fix_finish_crop.click(visualize_ref, [fix_crop, fix_ref], [fix_img])
         fix_img.change(lambda x: x, [fix_img], [fix_kp_right])
         fix_inpaint_mask.change(
             enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_ready
         )
         fix_checkbox.select(
             set_visible,
             [fix_checkbox, fix_kpts, fix_img, fix_kp_right, fix_kp_left],
         fix_reset_left.click(
             reset_kps, [fix_img, fix_kpts, gr.State("left")], [fix_kp_left, fix_kpts]
         )
         fix_vis_mask32.change(
             enable_component, [fix_vis_mask32, fix_vis_mask256], fix_run
         )
         fix_ready.click(
             ready_sample,
             [fix_original, fix_inpaint_mask, fix_kpts],
                 fix_vis_mask256,
             ],
         )
         fix_run.click(
             sample_inpaint,
             [
                 fix_latent,
                 fix_inpaint_latent,
                 fix_n_generation,
                 fix_seed,
                 fix_cfg,
                 fix_quality,
             ],
         )
     gr.Markdown("<h1>Citation</h1>")
+    gr.Markdown(
+        """<p style="text-align: left;">If this was useful, please cite us! ❤️</p>"""
+    )
     gr.Markdown(_CITE_)
+print("Ready to launch..")
+_, _, shared_url = demo.queue().launch(
+    share=True, server_name="0.0.0.0", server_port=7739
+)
+# demo.launch(share=True)

bad_hands/14.jpg ADDED Viewed

Git LFS Details

SHA256: f9dcd7eaf94c6f0d8ed1ed0f1c8cb500ad91ccd47a766c2363bbc845d6ae61d2
Pointer size: 131 Bytes
Size of remote file: 190 kB

bad_hands/15.jpg ADDED Viewed

Git LFS Details

SHA256: 92dfa5ee3db99ab7c9bbd7fe88d254ddb988b438d6092210d3bd31971fa29238
Pointer size: 130 Bytes
Size of remote file: 44.4 kB