Chaerin5 commited on
Commit
7685578
·
1 Parent(s): 6aa317f

instruction renovation; allow manual keypoints at edit hands

Browse files
Files changed (3) hide show
  1. app.py +506 -224
  2. bad_hands/14.jpg +3 -0
  3. bad_hands/15.jpg +3 -0
app.py CHANGED
@@ -266,7 +266,7 @@ hands = mp_hands.Hands(
266
  min_detection_confidence=0.1,
267
  )
268
 
269
- def get_ref_anno(ref):
270
  if ref is None:
271
  return (
272
  None,
@@ -280,55 +280,73 @@ def get_ref_anno(ref):
280
  img = ref["composite"][..., :3]
281
  img = cv2.resize(img, opts.image_size, interpolation=cv2.INTER_AREA)
282
  keypts = np.zeros((42, 2))
283
- if REF_POSE_MASK:
284
- mp_pose = hands.process(img)
285
- detected = np.array([0, 0])
286
- start_idx = 0
287
- if mp_pose.multi_hand_landmarks:
288
- # handedness is flipped assuming the input image is mirrored in MediaPipe
289
- for hand_landmarks, handedness in zip(
290
- mp_pose.multi_hand_landmarks, mp_pose.multi_handedness
291
- ):
292
- # actually right hand
293
- if handedness.classification[0].label == "Left":
294
- start_idx = 0
295
- detected[0] = 1
296
- # actually left hand
297
- elif handedness.classification[0].label == "Right":
298
- start_idx = 21
299
- detected[1] = 1
300
- for i, landmark in enumerate(hand_landmarks.landmark):
301
- keypts[start_idx + i] = [
302
- landmark.x * opts.image_size[1],
303
- landmark.y * opts.image_size[0],
304
- ]
305
-
306
- sam_predictor.set_image(img)
307
- l = keypts[:21].shape[0]
308
- if keypts[0].sum() != 0 and keypts[21].sum() != 0:
309
- input_point = np.array([keypts[0], keypts[21]])
310
- input_label = np.array([1, 1])
311
- elif keypts[0].sum() != 0:
312
- input_point = np.array(keypts[:1])
313
- input_label = np.array([1])
314
- elif keypts[21].sum() != 0:
315
- input_point = np.array(keypts[21:22])
316
- input_label = np.array([1])
317
- masks, _, _ = sam_predictor.predict(
318
- point_coords=input_point,
319
- point_labels=input_label,
320
- multimask_output=False,
321
- )
322
- hand_mask = masks[0]
323
- masked_img = img * hand_mask[..., None] + 255 * (1 - hand_mask[..., None])
324
- ref_pose = visualize_hand(keypts, masked_img)
325
  else:
326
- raise gr.Error("No hands detected in the reference image.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  else:
328
  hand_mask = np.zeros_like(img[:,:, 0])
329
  ref_pose = np.zeros_like(img)
330
- print(f"keypts.max(): {keypts.max()}, keypts.min(): {keypts.min()}")
331
-
332
  def make_ref_cond(
333
  img,
334
  keypts,
@@ -344,7 +362,7 @@ def get_ref_anno(ref):
344
  Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True),
345
  ]
346
  )
347
- image = image_transform(img)
348
  kpts_valid = check_keypoints_validity(keypts, target_size)
349
  heatmaps = torch.tensor(
350
  keypoint_heatmap(
@@ -352,6 +370,7 @@ def get_ref_anno(ref):
352
  )
353
  * kpts_valid[:, None, None],
354
  dtype=torch.float,
 
355
  )[None, ...]
356
  mask = torch.tensor(
357
  cv2.resize(
@@ -360,6 +379,7 @@ def get_ref_anno(ref):
360
  interpolation=cv2.INTER_NEAREST,
361
  ),
362
  dtype=torch.float,
 
363
  ).unsqueeze(0)[None, ...]
364
  return image[None, ...], heatmaps, mask
365
 
@@ -566,7 +586,7 @@ def sample_diff(ref_cond, target_cond, target_keypts, num_gen, seed, cfg):
566
  print(f"results[0].max(): {results[0].max()}")
567
  return results, results_pose
568
 
569
- # @spaces.GPU(duration=120)
570
  def ready_sample(img_ori, inpaint_mask, keypts):
571
  img = cv2.resize(img_ori[..., :3], opts.image_size, interpolation=cv2.INTER_AREA)
572
  sam_predictor.set_image(img)
@@ -588,21 +608,6 @@ def ready_sample(img_ori, inpaint_mask, keypts):
588
 
589
  keypts = np.concatenate(keypts, axis=0)
590
  keypts = scale_keypoint(keypts, (LENGTH, LENGTH), opts.image_size)
591
- # if keypts[0].sum() != 0 and keypts[21].sum() != 0:
592
- # input_point = np.array([keypts[0], keypts[21]])
593
- # # input_point = keypts
594
- # input_label = np.array([1, 1])
595
- # # input_label = np.ones_like(input_point[:, 0])
596
- # elif keypts[0].sum() != 0:
597
- # input_point = np.array(keypts[:1])
598
- # # input_point = keypts[:21]
599
- # input_label = np.array([1])
600
- # # input_label = np.ones_like(input_point[:21, 0])
601
- # elif keypts[21].sum() != 0:
602
- # input_point = np.array(keypts[21:22])
603
- # # input_point = keypts[21:]
604
- # input_label = np.array([1])
605
- # # input_label = np.ones_like(input_point[21:, 0])
606
 
607
  box_shift_ratio = 0.5
608
  box_size_factor = 1.2
@@ -784,7 +789,8 @@ def sample_inpaint(
784
 
785
 
786
  def flip_hand(
787
- img, pose_img, cond: Optional[torch.Tensor], keypts: Optional[torch.Tensor] = None
 
788
  ):
789
  if cond is None: # clear clicked
790
  return None, None, None, None
@@ -800,7 +806,11 @@ def flip_hand(
800
  if keypts[21:, :].sum() != 0:
801
  keypts[21:, 0] = opts.image_size[1] - keypts[21:, 0]
802
  # keypts[21:, 1] = opts.image_size[0] - keypts[21:, 1]
803
- return img, pose_img, cond, keypts
 
 
 
 
804
 
805
 
806
  def resize_to_full(img):
@@ -812,6 +822,9 @@ def resize_to_full(img):
812
 
813
  def clear_all():
814
  return (
 
 
 
815
  None,
816
  None,
817
  False,
@@ -828,6 +841,8 @@ def clear_all():
828
  1,
829
  42,
830
  3.0,
 
 
831
  )
832
 
833
 
@@ -878,7 +893,7 @@ def enable_component(image1, image2):
878
  return gr.update(interactive=True)
879
 
880
 
881
- def set_visible(checkbox, kpts, img_clean, img_pose_right, img_pose_left):
882
  if kpts is None:
883
  kpts = [[], []]
884
  if "Right hand" not in checkbox:
@@ -901,7 +916,7 @@ def set_visible(checkbox, kpts, img_clean, img_pose_right, img_pose_left):
901
  update_left = gr.update(visible=True)
902
  update_l_info = gr.update(visible=True)
903
 
904
- return (
905
  kpts,
906
  vis_right,
907
  vis_left,
@@ -913,12 +928,69 @@ def set_visible(checkbox, kpts, img_clean, img_pose_right, img_pose_left):
913
  update_left,
914
  update_r_info,
915
  update_l_info,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
916
  )
917
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
918
 
919
  LENGTH = 480
920
 
921
- example_imgs = [
922
  [
923
  "sample_images/sample1.jpg",
924
  ],
@@ -931,18 +1003,61 @@ example_imgs = [
931
  [
932
  "sample_images/sample4.jpg",
933
  ],
934
- [
935
- "sample_images/sample5.jpg",
936
- ],
937
  [
938
  "sample_images/sample6.jpg",
939
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
940
  [
941
- "sample_images/sample7.jpg",
942
- ],
943
- [
944
- "sample_images/sample8.jpg",
945
  ],
 
 
 
 
 
 
 
 
 
946
  [
947
  "sample_images/sample9.jpg",
948
  ],
@@ -953,29 +1068,30 @@ example_imgs = [
953
  "sample_images/sample11.jpg",
954
  ],
955
  ["pose_images/pose1.jpg"],
956
- ["pose_images/pose2.jpg"],
957
- ["pose_images/pose3.jpg"],
958
- ["pose_images/pose4.jpg"],
959
- ["pose_images/pose5.jpg"],
960
- ["pose_images/pose6.jpg"],
961
- ["pose_images/pose7.jpg"],
962
- ["pose_images/pose8.jpg"],
963
  ]
964
-
965
  fix_example_imgs = [
966
  ["bad_hands/1.jpg"], # "bad_hands/1_mask.jpg"],
967
- ["bad_hands/2.jpg"], # "bad_hands/2_mask.jpg"],
968
  ["bad_hands/3.jpg"], # "bad_hands/3_mask.jpg"],
969
- ["bad_hands/4.jpg"], # "bad_hands/4_mask.jpg"],
970
  ["bad_hands/5.jpg"], # "bad_hands/5_mask.jpg"],
971
  ["bad_hands/6.jpg"], # "bad_hands/6_mask.jpg"],
972
  ["bad_hands/7.jpg"], # "bad_hands/7_mask.jpg"],
973
- ["bad_hands/8.jpg"], # "bad_hands/8_mask.jpg"],
974
- ["bad_hands/9.jpg"], # "bad_hands/9_mask.jpg"],
975
- ["bad_hands/10.jpg"], # "bad_hands/10_mask.jpg"],
976
- ["bad_hands/11.jpg"], # "bad_hands/11_mask.jpg"],
977
- ["bad_hands/12.jpg"], # "bad_hands/12_mask.jpg"],
978
- ["bad_hands/13.jpg"], # "bad_hands/13_mask.jpg"],
 
 
979
  ]
980
  custom_css = """
981
  .gradio-container .examples img {
@@ -985,11 +1101,26 @@ custom_css = """
985
  """
986
 
987
  _HEADER_ = '''
988
- <h1><b>FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation</b></h1>
989
- <h2>
990
- 📝<a href='https://arxiv.org/abs/2412.02690' target='_blank'>Paper</a>
991
- 📢<a href='https://ivl.cs.brown.edu/research/foundhand.html' target='_blank'>Project</a>
992
- </h2>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
993
  '''
994
 
995
  _CITE_ = r"""
@@ -1003,11 +1134,17 @@ _CITE_ = r"""
1003
  ```
1004
  """
1005
 
1006
- with gr.Blocks(css=custom_css) as demo:
1007
  gr.Markdown(_HEADER_)
1008
  with gr.Tab("Edit Hand Poses"):
1009
  ref_img = gr.State(value=None)
 
 
 
 
1010
  ref_cond = gr.State(value=None)
 
 
1011
  keypts = gr.State(value=None)
1012
  target_img = gr.State(value=None)
1013
  target_cond = gr.State(value=None)
@@ -1016,9 +1153,11 @@ with gr.Blocks(css=custom_css) as demo:
1016
  with gr.Row():
1017
  with gr.Column():
1018
  gr.Markdown(
1019
- """<p style="text-align: center; font-size: 25px; font-weight: bold; ">1. Reference</p>"""
 
 
 
1020
  )
1021
- gr.Markdown("""<p style="text-align: center;"><br></p>""")
1022
  ref = gr.ImageEditor(
1023
  type="numpy",
1024
  label="Reference",
@@ -1029,21 +1168,114 @@ with gr.Blocks(css=custom_css) as demo:
1029
  layers=False,
1030
  crop_size="1:1",
1031
  )
 
 
 
 
1032
  ref_finish_crop = gr.Button(value="Finish Cropping", interactive=False)
1033
- ref_pose = gr.Image(
1034
- type="numpy",
1035
- label="Reference Pose",
1036
- show_label=True,
1037
- height=LENGTH,
1038
- width=LENGTH,
1039
- interactive=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1040
  )
1041
  ref_flip = gr.Checkbox(
1042
  value=False, label="Flip Handedness (Reference)", interactive=False
1043
  )
1044
  with gr.Column():
1045
  gr.Markdown(
1046
- """<p style="text-align: center; font-size: 25px; font-weight: bold;">2. Target</p>"""
 
 
 
1047
  )
1048
  target = gr.ImageEditor(
1049
  type="numpy",
@@ -1055,6 +1287,10 @@ with gr.Blocks(css=custom_css) as demo:
1055
  layers=False,
1056
  crop_size="1:1",
1057
  )
 
 
 
 
1058
  target_finish_crop = gr.Button(
1059
  value="Finish Cropping", interactive=False
1060
  )
@@ -1066,19 +1302,19 @@ with gr.Blocks(css=custom_css) as demo:
1066
  width=LENGTH,
1067
  interactive=False,
1068
  )
 
 
 
1069
  target_flip = gr.Checkbox(
1070
  value=False, label="Flip Handedness (Target)", interactive=False
1071
  )
1072
  with gr.Column():
1073
  gr.Markdown(
1074
- """<p style="text-align: center; font-size: 25px; font-weight: bold;">3. Result</p>"""
1075
- )
1076
- gr.Markdown(
1077
- """<p style="text-align: center;">Run is enabled after the images have been processed</p>"""
1078
  )
1079
  run = gr.Button(value="Run", interactive=False)
1080
  gr.Markdown(
1081
- """<p style="text-align: center;">~20s per generation with RTX3090. ~50s with A100. <br>(For example, if you set Number of generations as 2, it would take around 40s)</p>"""
1082
  )
1083
  results = gr.Gallery(
1084
  type="numpy",
@@ -1100,42 +1336,98 @@ with gr.Blocks(css=custom_css) as demo:
1100
  interactive=False,
1101
  preview=True,
1102
  )
 
 
 
1103
  clear = gr.ClearButton()
1104
 
1105
- with gr.Row():
1106
- n_generation = gr.Slider(
1107
- label="Number of generations",
1108
- value=1,
1109
- minimum=1,
1110
- maximum=MAX_N,
1111
- step=1,
1112
- randomize=False,
1113
- interactive=True,
1114
- )
1115
- seed = gr.Slider(
1116
- label="Seed",
1117
- value=42,
1118
- minimum=0,
1119
- maximum=10000,
1120
- step=1,
1121
- randomize=False,
1122
- interactive=True,
1123
- )
1124
- cfg = gr.Slider(
1125
- label="Classifier free guidance scale",
1126
- value=2.5,
1127
- minimum=0.0,
1128
- maximum=10.0,
1129
- step=0.1,
1130
- randomize=False,
1131
- interactive=True,
1132
- )
 
1133
 
1134
  ref.change(enable_component, [ref, ref], ref_finish_crop)
1135
- ref_finish_crop.click(get_ref_anno, [ref], [ref_img, ref_pose, ref_cond])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1136
  ref_pose.change(enable_component, [ref_img, ref_pose], ref_flip)
 
1137
  ref_flip.select(
1138
- flip_hand, [ref, ref_pose, ref_cond], [ref, ref_pose, ref_cond, dump]
1139
  )
1140
  target.change(enable_component, [target, target], target_finish_crop)
1141
  target_finish_crop.click(
@@ -1150,6 +1442,7 @@ with gr.Blocks(css=custom_css) as demo:
1150
  [target, target_pose, target_cond, target_keypts],
1151
  )
1152
  ref_pose.change(enable_component, [ref_pose, target_pose], run)
 
1153
  target_pose.change(enable_component, [ref_pose, target_pose], run)
1154
  run.click(
1155
  sample_diff,
@@ -1161,7 +1454,10 @@ with gr.Blocks(css=custom_css) as demo:
1161
  [],
1162
  [
1163
  ref,
 
 
1164
  ref_pose,
 
1165
  ref_flip,
1166
  target,
1167
  target_pose,
@@ -1170,23 +1466,35 @@ with gr.Blocks(css=custom_css) as demo:
1170
  results_pose,
1171
  ref_img,
1172
  ref_cond,
1173
- # mask,
1174
  target_img,
1175
  target_cond,
1176
  target_keypts,
1177
  n_generation,
1178
  seed,
1179
  cfg,
 
 
1180
  ],
1181
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1182
 
1183
- gr.Markdown("""<p style="font-size: 25px; font-weight: bold;">Examples</p>""")
1184
- with gr.Tab("Reference"):
1185
- with gr.Row():
1186
- gr.Examples(example_imgs, [ref], examples_per_page=20)
1187
- with gr.Tab("Target"):
1188
- with gr.Row():
1189
- gr.Examples(example_imgs, [target], examples_per_page=20)
1190
  with gr.Tab("Fix Hands"):
1191
  fix_inpaint_mask = gr.State(value=None)
1192
  fix_original = gr.State(value=None)
@@ -1197,19 +1505,13 @@ with gr.Blocks(css=custom_css) as demo:
1197
  fix_target_cond = gr.State(value=None)
1198
  fix_latent = gr.State(value=None)
1199
  fix_inpaint_latent = gr.State(value=None)
1200
- # fix_size_memory = gr.State(value=(0, 0))
1201
- gr.Markdown("""<p style="text-align: center; font-size: 25px; font-weight: bold; ">⚠️ Note</p>""")
1202
- gr.Markdown("""<p>"Fix Hands" with A100 needs around 6 mins, which is beyond the ZeroGPU quota (5 mins). Please either purchase additional gpus from Hugging Face or wait for us to open-source our code soon so that you can use your own gpus🙏 </p>""")
1203
  with gr.Row():
1204
  with gr.Column():
1205
  gr.Markdown(
1206
- """<p style="text-align: center; font-size: 25px; font-weight: bold; ">1. Image Cropping & Brushing</p>"""
1207
- )
1208
- gr.Markdown(
1209
- """<p style="text-align: center;">Crop the image around the hand.<br>Then, brush area (e.g., wrong finger) that needs to be fixed.</p>"""
1210
  )
1211
  gr.Markdown(
1212
- """<p style="text-align: center; font-size: 20px; font-weight: bold; ">A. Crop</p>"""
1213
  )
1214
  fix_crop = gr.ImageEditor(
1215
  type="numpy",
@@ -1224,8 +1526,13 @@ with gr.Blocks(css=custom_css) as demo:
1224
  image_mode="RGBA",
1225
  container=False,
1226
  )
 
 
 
 
 
1227
  gr.Markdown(
1228
- """<p style="text-align: center; font-size: 20px; font-weight: bold; ">B. Brush</p>"""
1229
  )
1230
  fix_ref = gr.ImageEditor(
1231
  type="numpy",
@@ -1246,32 +1553,21 @@ with gr.Blocks(css=custom_css) as demo:
1246
  fix_finish_crop = gr.Button(
1247
  value="Finish Croping & Brushing", interactive=False
1248
  )
1249
- gr.Markdown(
1250
- """<p style="text-align: left; font-size: 20px; font-weight: bold; ">OpenPose keypoints convention</p>"""
1251
- )
1252
- fix_openpose = gr.Image(
1253
- value="openpose.png",
1254
- type="numpy",
1255
- label="OpenPose keypoints convention",
1256
- show_label=True,
1257
- height=LENGTH // 3 * 2,
1258
- width=LENGTH // 3 * 2,
1259
- interactive=False,
1260
- )
1261
  with gr.Column():
1262
  gr.Markdown(
1263
- """<p style="text-align: center; font-size: 25px; font-weight: bold; ">2. Keypoint Selection</p>"""
1264
  )
1265
  gr.Markdown(
1266
- """<p style="text-align: center;">On the hand, select 21 keypoints that you hope the output to be. <br>Please see the \"OpenPose keypoints convention\" on the bottom left.</p>"""
1267
  )
1268
  fix_checkbox = gr.CheckboxGroup(
1269
  ["Right hand", "Left hand"],
1270
- # value=["Right hand", "Left hand"],
1271
- label="Hand side",
1272
- info="Which side this hand is? Could be both.",
1273
  interactive=False,
1274
  )
 
 
 
1275
  fix_kp_r_info = gr.Markdown(
1276
  """<p style="text-align: center; font-size: 20px; font-weight: bold; ">Select right only</p>""",
1277
  visible=False,
@@ -1314,22 +1610,24 @@ with gr.Blocks(css=custom_css) as demo:
1314
  fix_reset_left = gr.Button(
1315
  value="Reset", interactive=False, visible=False
1316
  )
1317
- with gr.Column():
1318
  gr.Markdown(
1319
- """<p style="text-align: center; font-size: 25px; font-weight: bold; ">3. Prepare Mask</p>"""
 
 
 
 
 
 
 
 
1320
  )
 
1321
  gr.Markdown(
1322
- """<p style="text-align: center;">In Fix Hands, not segmentation mask, but only inpaint mask is used.</p>"""
1323
  )
1324
  fix_ready = gr.Button(value="Ready", interactive=False)
1325
- fix_mask_size = gr.Radio(
1326
- ["256x256", "latent size (32x32)"],
1327
- label="Visualized inpaint mask size",
1328
- interactive=False,
1329
- value="256x256",
1330
- )
1331
  gr.Markdown(
1332
- """<p style="text-align: center; font-size: 20px; font-weight: bold; ">Visualized inpaint masks</p>"""
1333
  )
1334
  fix_vis_mask32 = gr.Image(
1335
  type="numpy",
@@ -1342,20 +1640,22 @@ with gr.Blocks(css=custom_css) as demo:
1342
  )
1343
  fix_vis_mask256 = gr.Image(
1344
  type="numpy",
1345
- label=f"Visualized {opts.image_size} Inpaint Mask",
1346
  visible=True,
1347
- show_label=True,
1348
  height=opts.image_size,
1349
  width=opts.image_size,
1350
  interactive=False,
1351
  )
 
 
 
1352
  with gr.Column():
1353
  gr.Markdown(
1354
- """<p style="text-align: center; font-size: 25px; font-weight: bold; ">4. Results</p>"""
1355
  )
1356
  fix_run = gr.Button(value="Run", interactive=False)
1357
  gr.Markdown(
1358
- """<p style="text-align: center;">>3min and ~24GB per generation</p>"""
1359
  )
1360
  fix_result = gr.Gallery(
1361
  type="numpy",
@@ -1377,9 +1677,16 @@ with gr.Blocks(css=custom_css) as demo:
1377
  interactive=False,
1378
  preview=True,
1379
  )
 
 
 
1380
  fix_clear = gr.ClearButton()
 
 
 
 
1381
  gr.Markdown(
1382
- "[NOTE] Currently, Number of generation > 1 could lead to out-of-memory"
1383
  )
1384
  with gr.Row():
1385
  fix_n_generation = gr.Slider(
@@ -1422,8 +1729,6 @@ with gr.Blocks(css=custom_css) as demo:
1422
  fix_crop.change(resize_to_full, fix_crop, fix_ref)
1423
  fix_ref.change(enable_component, [fix_ref, fix_ref], fix_finish_crop)
1424
  fix_finish_crop.click(get_mask_inpaint, [fix_ref], [fix_inpaint_mask])
1425
- # fix_finish_crop.click(lambda x: x["background"], [fix_ref], [fix_kp_right])
1426
- # fix_finish_crop.click(lambda x: x["background"], [fix_ref], [fix_kp_left])
1427
  fix_finish_crop.click(lambda x: x["background"], [fix_crop], [fix_original])
1428
  fix_finish_crop.click(visualize_ref, [fix_crop, fix_ref], [fix_img])
1429
  fix_img.change(lambda x: x, [fix_img], [fix_kp_right])
@@ -1452,9 +1757,6 @@ with gr.Blocks(css=custom_css) as demo:
1452
  fix_inpaint_mask.change(
1453
  enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_ready
1454
  )
1455
- # fix_inpaint_mask.change(
1456
- # enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_run
1457
- # )
1458
  fix_checkbox.select(
1459
  set_visible,
1460
  [fix_checkbox, fix_kpts, fix_img, fix_kp_right, fix_kp_left],
@@ -1490,14 +1792,9 @@ with gr.Blocks(css=custom_css) as demo:
1490
  fix_reset_left.click(
1491
  reset_kps, [fix_img, fix_kpts, gr.State("left")], [fix_kp_left, fix_kpts]
1492
  )
1493
- # fix_kpts.change(check_keypoints, [fix_kpts], [fix_kp_right, fix_kp_left, fix_run])
1494
- # fix_run.click(lambda x:gr.update(value=None), [], [fix_result, fix_result_pose])
1495
  fix_vis_mask32.change(
1496
  enable_component, [fix_vis_mask32, fix_vis_mask256], fix_run
1497
  )
1498
- fix_vis_mask32.change(
1499
- enable_component, [fix_vis_mask32, fix_vis_mask256], fix_mask_size
1500
- )
1501
  fix_ready.click(
1502
  ready_sample,
1503
  [fix_original, fix_inpaint_mask, fix_kpts],
@@ -1511,9 +1808,6 @@ with gr.Blocks(css=custom_css) as demo:
1511
  fix_vis_mask256,
1512
  ],
1513
  )
1514
- fix_mask_size.select(
1515
- switch_mask_size, [fix_mask_size], [fix_vis_mask32, fix_vis_mask256]
1516
- )
1517
  fix_run.click(
1518
  sample_inpaint,
1519
  [
@@ -1551,32 +1845,20 @@ with gr.Blocks(css=custom_css) as demo:
1551
  fix_latent,
1552
  fix_inpaint_latent,
1553
  fix_n_generation,
1554
- # fix_size_memory,
1555
  fix_seed,
1556
  fix_cfg,
1557
  fix_quality,
1558
  ],
1559
  )
1560
 
1561
- gr.Markdown("""<p style="font-size: 25px; font-weight: bold;">Examples</p>""")
1562
- fix_dump_ex = gr.Image(value=None, label="Original Image", visible=False)
1563
- fix_dump_ex_masked = gr.Image(value=None, label="After Brushing", visible=False)
1564
- with gr.Column():
1565
- fix_example = gr.Examples(
1566
- fix_example_imgs,
1567
- # run_on_click=True,
1568
- # fn=parse_fix_example,
1569
- # inputs=[fix_dump_ex, fix_dump_ex_masked],
1570
- # outputs=[fix_original, fix_ref, fix_img, fix_inpaint_mask],
1571
- inputs=[fix_crop],
1572
- examples_per_page=20,
1573
- )
1574
-
1575
  gr.Markdown("<h1>Citation</h1>")
 
 
 
1576
  gr.Markdown(_CITE_)
1577
 
1578
- # print("Ready to launch..")
1579
- # _, _, shared_url = demo.queue().launch(
1580
- # share=True, server_name="0.0.0.0", server_port=7739
1581
- # )
1582
- demo.launch(share=True)
 
266
  min_detection_confidence=0.1,
267
  )
268
 
269
+ def prepare_ref_anno(ref):
270
  if ref is None:
271
  return (
272
  None,
 
280
  img = ref["composite"][..., :3]
281
  img = cv2.resize(img, opts.image_size, interpolation=cv2.INTER_AREA)
282
  keypts = np.zeros((42, 2))
283
+ mp_pose = hands.process(img)
284
+ if mp_pose.multi_hand_landmarks:
285
+ # handedness is flipped assuming the input image is mirrored in MediaPipe
286
+ for hand_landmarks, handedness in zip(
287
+ mp_pose.multi_hand_landmarks, mp_pose.multi_handedness
288
+ ):
289
+ # actually right hand
290
+ if handedness.classification[0].label == "Left":
291
+ start_idx = 0
292
+ # actually left hand
293
+ elif handedness.classification[0].label == "Right":
294
+ start_idx = 21
295
+ for i, landmark in enumerate(hand_landmarks.landmark):
296
+ keypts[start_idx + i] = [
297
+ landmark.x * opts.image_size[1],
298
+ landmark.y * opts.image_size[0],
299
+ ]
300
+
301
+ print(f"keypts.max(): {keypts.max()}, keypts.min(): {keypts.min()}")
302
+ return img, keypts
303
+ else:
304
+ return img, None
305
+
306
+ def get_ref_anno(img, keypts):
307
+ if keypts is None:
308
+ no_hands = cv2.resize(np.array(Image.open("no_hands.png"))[..., :3], (LENGTH, LENGTH))
309
+ return None, no_hands, None
310
+ if isinstance(keypts, list):
311
+ if len(keypts[0]) == 0:
312
+ keypts[0] = np.zeros((21, 2))
313
+ elif len(keypts[0]) == 21:
314
+ keypts[0] = np.array(keypts[0], dtype=np.float32)
 
 
 
 
 
 
 
 
 
 
315
  else:
316
+ gr.Info("Number of right hand keypoints should be either 0 or 21.")
317
+ return None, None
318
+
319
+ if len(keypts[1]) == 0:
320
+ keypts[1] = np.zeros((21, 2))
321
+ elif len(keypts[1]) == 21:
322
+ keypts[1] = np.array(keypts[1], dtype=np.float32)
323
+ else:
324
+ gr.Info("Number of left hand keypoints should be either 0 or 21.")
325
+ return None, None
326
+
327
+ keypts = np.concatenate(keypts, axis=0)
328
+ if REF_POSE_MASK:
329
+ sam_predictor.set_image(img)
330
+ if keypts[0].sum() != 0 and keypts[21].sum() != 0:
331
+ input_point = np.array([keypts[0], keypts[21]])
332
+ input_label = np.array([1, 1])
333
+ elif keypts[0].sum() != 0:
334
+ input_point = np.array(keypts[:1])
335
+ input_label = np.array([1])
336
+ elif keypts[21].sum() != 0:
337
+ input_point = np.array(keypts[21:22])
338
+ input_label = np.array([1])
339
+ masks, _, _ = sam_predictor.predict(
340
+ point_coords=input_point,
341
+ point_labels=input_label,
342
+ multimask_output=False,
343
+ )
344
+ hand_mask = masks[0]
345
+ masked_img = img * hand_mask[..., None] + 255 * (1 - hand_mask[..., None])
346
+ ref_pose = visualize_hand(keypts, masked_img)
347
  else:
348
  hand_mask = np.zeros_like(img[:,:, 0])
349
  ref_pose = np.zeros_like(img)
 
 
350
  def make_ref_cond(
351
  img,
352
  keypts,
 
362
  Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True),
363
  ]
364
  )
365
+ image = image_transform(img) # .to(device)
366
  kpts_valid = check_keypoints_validity(keypts, target_size)
367
  heatmaps = torch.tensor(
368
  keypoint_heatmap(
 
370
  )
371
  * kpts_valid[:, None, None],
372
  dtype=torch.float,
373
+ # device=device
374
  )[None, ...]
375
  mask = torch.tensor(
376
  cv2.resize(
 
379
  interpolation=cv2.INTER_NEAREST,
380
  ),
381
  dtype=torch.float,
382
+ # device=device,
383
  ).unsqueeze(0)[None, ...]
384
  return image[None, ...], heatmaps, mask
385
 
 
586
  print(f"results[0].max(): {results[0].max()}")
587
  return results, results_pose
588
 
589
+ @spaces.GPU(duration=120)
590
  def ready_sample(img_ori, inpaint_mask, keypts):
591
  img = cv2.resize(img_ori[..., :3], opts.image_size, interpolation=cv2.INTER_AREA)
592
  sam_predictor.set_image(img)
 
608
 
609
  keypts = np.concatenate(keypts, axis=0)
610
  keypts = scale_keypoint(keypts, (LENGTH, LENGTH), opts.image_size)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
611
 
612
  box_shift_ratio = 0.5
613
  box_size_factor = 1.2
 
789
 
790
 
791
  def flip_hand(
792
+ img, pose_img, cond: Optional[torch.Tensor], keypts: Optional[torch.Tensor] = None, pose_manual_img = None,
793
+ manual_kp_right=None, manual_kp_left=None
794
  ):
795
  if cond is None: # clear clicked
796
  return None, None, None, None
 
806
  if keypts[21:, :].sum() != 0:
807
  keypts[21:, 0] = opts.image_size[1] - keypts[21:, 0]
808
  # keypts[21:, 1] = opts.image_size[0] - keypts[21:, 1]
809
+ if pose_manual_img is not None:
810
+ pose_manual_img = pose_manual_img[:, ::-1, :]
811
+ manual_kp_right = manual_kp_right[:, ::-1, :]
812
+ manual_kp_left = manual_kp_left[:, ::-1, :]
813
+ return img, pose_img, cond, keypts, pose_manual_img, manual_kp_right, manual_kp_left
814
 
815
 
816
  def resize_to_full(img):
 
822
 
823
  def clear_all():
824
  return (
825
+ None,
826
+ None,
827
+ None,
828
  None,
829
  None,
830
  False,
 
841
  1,
842
  42,
843
  3.0,
844
+ gr.update(interactive=False),
845
+ []
846
  )
847
 
848
 
 
893
  return gr.update(interactive=True)
894
 
895
 
896
+ def set_visible(checkbox, kpts, img_clean, img_pose_right, img_pose_left, done=None, done_info=None):
897
  if kpts is None:
898
  kpts = [[], []]
899
  if "Right hand" not in checkbox:
 
916
  update_left = gr.update(visible=True)
917
  update_l_info = gr.update(visible=True)
918
 
919
+ ret = [
920
  kpts,
921
  vis_right,
922
  vis_left,
 
928
  update_left,
929
  update_r_info,
930
  update_l_info,
931
+ ]
932
+ if done is not None:
933
+ if not checkbox:
934
+ ret.append(gr.update(visible=False))
935
+ ret.append(gr.update(visible=False))
936
+ else:
937
+ ret.append(gr.update(visible=True))
938
+ ret.append(gr.update(visible=True))
939
+ return tuple(ret)
940
+
941
+ def set_unvisible():
942
+ return (
943
+ gr.update(visible=False),
944
+ gr.update(visible=False),
945
+ gr.update(visible=False),
946
+ gr.update(visible=False),
947
+ gr.update(visible=False),
948
+ gr.update(visible=False),
949
+ gr.update(visible=False),
950
+ gr.update(visible=False),
951
+ gr.update(visible=False),
952
+ gr.update(visible=False),
953
+ gr.update(visible=False),
954
+ gr.update(visible=False)
955
  )
956
 
957
+ def set_no_hands(decider, component):
958
+ if decider is None:
959
+ no_hands = cv2.resize(np.array(Image.open("no_hands.png"))[..., :3], (LENGTH, LENGTH))
960
+ return no_hands
961
+ else:
962
+ return component
963
+
964
+ # def visible_component(decider, component):
965
+ # if decider is not None:
966
+ # update_component = gr.update(visible=True)
967
+ # else:
968
+ # update_component = gr.update(visible=False)
969
+ # return update_component
970
+
971
+ def unvisible_component(decider, component):
972
+ if decider is not None:
973
+ update_component = gr.update(visible=False)
974
+ else:
975
+ update_component = gr.update(visible=True)
976
+ return update_component
977
+
978
+ def make_change(decider, state):
979
+ '''
980
+ if decider is not None, change the state's value. True/False does not matter.
981
+ '''
982
+ if decider is not None:
983
+ if state:
984
+ state = False
985
+ else:
986
+ state = True
987
+ return state
988
+ else:
989
+ return state
990
 
991
  LENGTH = 480
992
 
993
+ example_ref_imgs = [
994
  [
995
  "sample_images/sample1.jpg",
996
  ],
 
1003
  [
1004
  "sample_images/sample4.jpg",
1005
  ],
1006
+ # [
1007
+ # "sample_images/sample5.jpg",
1008
+ # ],
1009
  [
1010
  "sample_images/sample6.jpg",
1011
  ],
1012
+ # [
1013
+ # "sample_images/sample7.jpg",
1014
+ # ],
1015
+ # [
1016
+ # "sample_images/sample8.jpg",
1017
+ # ],
1018
+ # [
1019
+ # "sample_images/sample9.jpg",
1020
+ # ],
1021
+ # [
1022
+ # "sample_images/sample10.jpg",
1023
+ # ],
1024
+ # [
1025
+ # "sample_images/sample11.jpg",
1026
+ # ],
1027
+ # ["pose_images/pose1.jpg"],
1028
+ # ["pose_images/pose2.jpg"],
1029
+ # ["pose_images/pose3.jpg"],
1030
+ # ["pose_images/pose4.jpg"],
1031
+ # ["pose_images/pose5.jpg"],
1032
+ # ["pose_images/pose6.jpg"],
1033
+ # ["pose_images/pose7.jpg"],
1034
+ # ["pose_images/pose8.jpg"],
1035
+ ]
1036
+ example_target_imgs = [
1037
+ # [
1038
+ # "sample_images/sample1.jpg",
1039
+ # ],
1040
+ # [
1041
+ # "sample_images/sample2.jpg",
1042
+ # ],
1043
+ # [
1044
+ # "sample_images/sample3.jpg",
1045
+ # ],
1046
+ # [
1047
+ # "sample_images/sample4.jpg",
1048
+ # ],
1049
  [
1050
+ "sample_images/sample5.jpg",
 
 
 
1051
  ],
1052
+ # [
1053
+ # "sample_images/sample6.jpg",
1054
+ # ],
1055
+ # [
1056
+ # "sample_images/sample7.jpg",
1057
+ # ],
1058
+ # [
1059
+ # "sample_images/sample8.jpg",
1060
+ # ],
1061
  [
1062
  "sample_images/sample9.jpg",
1063
  ],
 
1068
  "sample_images/sample11.jpg",
1069
  ],
1070
  ["pose_images/pose1.jpg"],
1071
+ # ["pose_images/pose2.jpg"],
1072
+ # ["pose_images/pose3.jpg"],
1073
+ # ["pose_images/pose4.jpg"],
1074
+ # ["pose_images/pose5.jpg"],
1075
+ # ["pose_images/pose6.jpg"],
1076
+ # ["pose_images/pose7.jpg"],
1077
+ # ["pose_images/pose8.jpg"],
1078
  ]
 
1079
  fix_example_imgs = [
1080
  ["bad_hands/1.jpg"], # "bad_hands/1_mask.jpg"],
1081
+ # ["bad_hands/2.jpg"], # "bad_hands/2_mask.jpg"],
1082
  ["bad_hands/3.jpg"], # "bad_hands/3_mask.jpg"],
1083
+ # ["bad_hands/4.jpg"], # "bad_hands/4_mask.jpg"],
1084
  ["bad_hands/5.jpg"], # "bad_hands/5_mask.jpg"],
1085
  ["bad_hands/6.jpg"], # "bad_hands/6_mask.jpg"],
1086
  ["bad_hands/7.jpg"], # "bad_hands/7_mask.jpg"],
1087
+ # ["bad_hands/8.jpg"], # "bad_hands/8_mask.jpg"],
1088
+ # ["bad_hands/9.jpg"], # "bad_hands/9_mask.jpg"],
1089
+ # ["bad_hands/10.jpg"], # "bad_hands/10_mask.jpg"],
1090
+ # ["bad_hands/11.jpg"], # "bad_hands/11_mask.jpg"],
1091
+ # ["bad_hands/12.jpg"], # "bad_hands/12_mask.jpg"],
1092
+ # ["bad_hands/13.jpg"], # "bad_hands/13_mask.jpg"],
1093
+ ["bad_hands/14.jpg"],
1094
+ ["bad_hands/15.jpg"],
1095
  ]
1096
  custom_css = """
1097
  .gradio-container .examples img {
 
1101
  """
1102
 
1103
  _HEADER_ = '''
1104
+ <div style="text-align: center;">
1105
+ <h1><b>FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation</b></h1>
1106
+ <h2 style="color: #777777;">CVPR 2025</h2>
1107
+ <style>
1108
+ .link-spacing {
1109
+ margin-right: 20px;
1110
+ }
1111
+ </style>
1112
+ <p style="font-size: 15px;">
1113
+ <span style="display: inline-block; margin-right: 30px;">Brown University</span>
1114
+ <span style="display: inline-block;">Meta Reality Labs</span>
1115
+ </p>
1116
+ <h3>
1117
+ <a href='https://arxiv.org/abs/2412.02690' target='_blank' class="link-spacing">Paper</a>
1118
+ <a href='https://ivl.cs.brown.edu/research/foundhand.html' target='_blank' class="link-spacing">Project Page</a>
1119
+ <a href='' target='_blank' class="link-spacing">Code</a>
1120
+ <a href='' target='_blank'>Model Weights</a>
1121
+ </h3>
1122
+ <p>Below are two important abilities of our model. First, we can <b>edit hand poses</b> given two hand images - one is the image to edit, and the other one provides target hand pose. Second, we can automatically <b>fix malformed hand images</b>, following the user-provided target hand pose and area to fix.</p>
1123
+ </div>
1124
  '''
1125
 
1126
  _CITE_ = r"""
 
1134
  ```
1135
  """
1136
 
1137
+ with gr.Blocks(css=custom_css, theme="soft") as demo:
1138
  gr.Markdown(_HEADER_)
1139
  with gr.Tab("Edit Hand Poses"):
1140
  ref_img = gr.State(value=None)
1141
+ ref_im_raw = gr.State(value=None)
1142
+ ref_kp_raw = gr.State(value=0)
1143
+ ref_kp_got = gr.State(value=None)
1144
+ dump = gr.State(value=None)
1145
  ref_cond = gr.State(value=None)
1146
+ ref_manual_cond = gr.State(value=None)
1147
+ ref_auto_cond = gr.State(value=None)
1148
  keypts = gr.State(value=None)
1149
  target_img = gr.State(value=None)
1150
  target_cond = gr.State(value=None)
 
1153
  with gr.Row():
1154
  with gr.Column():
1155
  gr.Markdown(
1156
+ """<p style="text-align: center; font-size: 20px; font-weight: bold;">1. Upload a hand image to edit 📥</p>"""
1157
+ )
1158
+ gr.Markdown(
1159
+ """<p style="text-align: center;">&#9312; Optionally crop the image</p>"""
1160
  )
 
1161
  ref = gr.ImageEditor(
1162
  type="numpy",
1163
  label="Reference",
 
1168
  layers=False,
1169
  crop_size="1:1",
1170
  )
1171
+ gr.Examples(example_ref_imgs, [ref], examples_per_page=20)
1172
+ gr.Markdown(
1173
+ """<p style="text-align: center;">&#9313; Hit the &quot;Finish Cropping&quot; button to get hand pose</p>"""
1174
+ )
1175
  ref_finish_crop = gr.Button(value="Finish Cropping", interactive=False)
1176
+ with gr.Tab("Automatic hand keypoints"):
1177
+ ref_pose = gr.Image(
1178
+ type="numpy",
1179
+ label="Reference Pose",
1180
+ show_label=True,
1181
+ height=LENGTH,
1182
+ width=LENGTH,
1183
+ interactive=False,
1184
+ )
1185
+ ref_use_auto = gr.Button(value="Click here to use automatic, not manual", interactive=False, visible=True)
1186
+ with gr.Tab("Manual hand keypoints"):
1187
+ ref_manual_checkbox_info = gr.Markdown(
1188
+ """<p style="text-align: center;"><b>Step 1.</b> Tell us if this is right, left, or both hands.</p>""",
1189
+ visible=True,
1190
+ )
1191
+ ref_manual_checkbox = gr.CheckboxGroup(
1192
+ ["Right hand", "Left hand"],
1193
+ show_label=False,
1194
+ visible=True,
1195
+ interactive=True,
1196
+ )
1197
+ ref_manual_kp_r_info = gr.Markdown(
1198
+ """<p style="text-align: center;"><b>Step 2.</b> Click on image to provide hand keypoints for <b>right</b> hand. See \"OpenPose Keypoint Convention\" for guidance.</p>""",
1199
+ visible=False,
1200
+ )
1201
+ ref_manual_kp_right = gr.Image(
1202
+ type="numpy",
1203
+ label="Keypoint Selection (right hand)",
1204
+ show_label=True,
1205
+ height=LENGTH,
1206
+ width=LENGTH,
1207
+ interactive=False,
1208
+ visible=False,
1209
+ sources=[],
1210
+ )
1211
+ with gr.Row():
1212
+ ref_manual_undo_right = gr.Button(
1213
+ value="Undo", interactive=True, visible=False
1214
+ )
1215
+ ref_manual_reset_right = gr.Button(
1216
+ value="Reset", interactive=True, visible=False
1217
+ )
1218
+ ref_manual_kp_l_info = gr.Markdown(
1219
+ """<p style="text-align: center;"><b>Step 2.</b> Click on image to provide hand keypoints for <b>left</b> hand. See \"OpenPose keypoint convention\" for guidance.</p>""",
1220
+ visible=False
1221
+ )
1222
+ ref_manual_kp_left = gr.Image(
1223
+ type="numpy",
1224
+ label="Keypoint Selection (left hand)",
1225
+ show_label=True,
1226
+ height=LENGTH,
1227
+ width=LENGTH,
1228
+ interactive=False,
1229
+ visible=False,
1230
+ sources=[],
1231
+ )
1232
+ with gr.Row():
1233
+ ref_manual_undo_left = gr.Button(
1234
+ value="Undo", interactive=True, visible=False
1235
+ )
1236
+ ref_manual_reset_left = gr.Button(
1237
+ value="Reset", interactive=True, visible=False
1238
+ )
1239
+ ref_manual_done_info = gr.Markdown(
1240
+ """<p style="text-align: center;"><b>Step 3.</b> Hit \"Done\" button to confirm.</p>""",
1241
+ visible=False,
1242
+ )
1243
+ ref_manual_done = gr.Button(value="Done", interactive=True, visible=False)
1244
+ ref_manual_pose = gr.Image(
1245
+ type="numpy",
1246
+ label="Reference Pose",
1247
+ show_label=True,
1248
+ height=LENGTH,
1249
+ width=LENGTH,
1250
+ interactive=False,
1251
+ visible=False
1252
+ )
1253
+ ref_use_manual = gr.Button(value="Click here to use manual, not automatic", interactive=True, visible=False)
1254
+ ref_manual_instruct = gr.Markdown(
1255
+ value="""<p style="text-align: left; font-weight: bold; ">OpenPose Keypoints Convention</p>""",
1256
+ visible=True
1257
+ )
1258
+ ref_manual_openpose = gr.Image(
1259
+ value="openpose.png",
1260
+ type="numpy",
1261
+ show_label=False,
1262
+ height=LENGTH // 2,
1263
+ width=LENGTH // 2,
1264
+ interactive=False,
1265
+ visible=True
1266
+ )
1267
+ gr.Markdown(
1268
+ """<p style="text-align: center;">&#9314; Optionally flip the hand</p>"""
1269
  )
1270
  ref_flip = gr.Checkbox(
1271
  value=False, label="Flip Handedness (Reference)", interactive=False
1272
  )
1273
  with gr.Column():
1274
  gr.Markdown(
1275
+ """<p style="text-align: center; font-size: 20px; font-weight: bold;">2. Upload a hand image for target hand pose 📥</p>"""
1276
+ )
1277
+ gr.Markdown(
1278
+ """<p style="text-align: center;">&#9312; Optionally crop the image</p>"""
1279
  )
1280
  target = gr.ImageEditor(
1281
  type="numpy",
 
1287
  layers=False,
1288
  crop_size="1:1",
1289
  )
1290
+ gr.Examples(example_target_imgs, [target], examples_per_page=20)
1291
+ gr.Markdown(
1292
+ """<p style="text-align: center;">&#9313; Hit the &quot;Finish Cropping&quot; button to get hand pose</p>"""
1293
+ )
1294
  target_finish_crop = gr.Button(
1295
  value="Finish Cropping", interactive=False
1296
  )
 
1302
  width=LENGTH,
1303
  interactive=False,
1304
  )
1305
+ gr.Markdown(
1306
+ """<p style="text-align: center;">&#9314; Optionally flip the hand</p>"""
1307
+ )
1308
  target_flip = gr.Checkbox(
1309
  value=False, label="Flip Handedness (Target)", interactive=False
1310
  )
1311
  with gr.Column():
1312
  gr.Markdown(
1313
+ """<p style="text-align: center; font-size: 20px; font-weight: bold;">3. Press &quot;Run&quot; to get the edited results 🎯</p>"""
 
 
 
1314
  )
1315
  run = gr.Button(value="Run", interactive=False)
1316
  gr.Markdown(
1317
+ """<p style="text-align: center;">⚠️ ~20s per generation with RTX3090. ~50s with A100. <br>(For example, if you set Number of generations as 2, it would take around 40s)</p>"""
1318
  )
1319
  results = gr.Gallery(
1320
  type="numpy",
 
1336
  interactive=False,
1337
  preview=True,
1338
  )
1339
+ gr.Markdown(
1340
+ """<p style="text-align: center;">✨ Hit &quot;Clear&quot; to restart from the beginning</p>"""
1341
+ )
1342
  clear = gr.ClearButton()
1343
 
1344
+ with gr.Tab("More options"):
1345
+ with gr.Row():
1346
+ n_generation = gr.Slider(
1347
+ label="Number of generations",
1348
+ value=1,
1349
+ minimum=1,
1350
+ maximum=MAX_N,
1351
+ step=1,
1352
+ randomize=False,
1353
+ interactive=True,
1354
+ )
1355
+ seed = gr.Slider(
1356
+ label="Seed",
1357
+ value=42,
1358
+ minimum=0,
1359
+ maximum=10000,
1360
+ step=1,
1361
+ randomize=False,
1362
+ interactive=True,
1363
+ )
1364
+ cfg = gr.Slider(
1365
+ label="Classifier free guidance scale",
1366
+ value=2.5,
1367
+ minimum=0.0,
1368
+ maximum=10.0,
1369
+ step=0.1,
1370
+ randomize=False,
1371
+ interactive=True,
1372
+ )
1373
 
1374
  ref.change(enable_component, [ref, ref], ref_finish_crop)
1375
+ ref_finish_crop.click(prepare_ref_anno, [ref], [ref_im_raw, ref_kp_raw])
1376
+ ref_kp_raw.change(lambda x: x, ref_im_raw, ref_manual_kp_right)
1377
+ ref_kp_raw.change(lambda x: x, ref_im_raw, ref_manual_kp_left)
1378
+ ref_manual_checkbox.select(
1379
+ set_visible,
1380
+ [ref_manual_checkbox, ref_kp_got, ref_im_raw, ref_manual_kp_right, ref_manual_kp_left, ref_manual_done],
1381
+ [
1382
+ ref_kp_got,
1383
+ ref_manual_kp_right,
1384
+ ref_manual_kp_left,
1385
+ ref_manual_kp_right,
1386
+ ref_manual_undo_right,
1387
+ ref_manual_reset_right,
1388
+ ref_manual_kp_left,
1389
+ ref_manual_undo_left,
1390
+ ref_manual_reset_left,
1391
+ ref_manual_kp_r_info,
1392
+ ref_manual_kp_l_info,
1393
+ ref_manual_done,
1394
+ ref_manual_done_info
1395
+ ]
1396
+ )
1397
+ ref_manual_kp_right.select(
1398
+ get_kps, [ref_im_raw, ref_kp_got, gr.State("right")], [ref_manual_kp_right, ref_kp_got]
1399
+ )
1400
+ ref_manual_undo_right.click(
1401
+ undo_kps, [ref_im_raw, ref_kp_got, gr.State("right")], [ref_manual_kp_right, ref_kp_got]
1402
+ )
1403
+ ref_manual_reset_right.click(
1404
+ reset_kps, [ref_im_raw, ref_kp_got, gr.State("right")], [ref_manual_kp_right, ref_kp_got]
1405
+ )
1406
+ ref_manual_kp_left.select(
1407
+ get_kps, [ref_im_raw, ref_kp_got, gr.State("left")], [ref_manual_kp_left, ref_kp_got]
1408
+ )
1409
+ ref_manual_undo_left.click(
1410
+ undo_kps, [ref_im_raw, ref_kp_got, gr.State("left")], [ref_manual_kp_left, ref_kp_got]
1411
+ )
1412
+ ref_manual_reset_left.click(
1413
+ reset_kps, [ref_im_raw, ref_kp_got, gr.State("left")], [ref_manual_kp_left, ref_kp_got]
1414
+ )
1415
+ ref_manual_done.click(get_ref_anno, [ref_im_raw, ref_kp_got], [ref_img, ref_manual_pose, ref_manual_cond])
1416
+ ref_manual_cond.change(lambda x: x, ref_manual_cond, ref_cond)
1417
+ ref_use_manual.click(lambda x: x, ref_manual_cond, ref_cond)
1418
+ ref_use_manual.click(lambda x: gr.Info("Manual hand keypoints will be used for 'Reference'", duration=3))
1419
+ ref_manual_done.click(lambda x: gr.update(visible=True), ref_manual_pose, ref_manual_pose)
1420
+ ref_manual_done.click(lambda x: gr.update(visible=True), ref_use_manual, ref_use_manual)
1421
+ ref_manual_pose.change(enable_component, [ref_manual_pose, ref_manual_pose], ref_manual_done)
1422
+ ref_kp_raw.change(get_ref_anno, [ref_im_raw, ref_kp_raw], [ref_img, ref_pose, ref_auto_cond])
1423
+ ref_auto_cond.change(lambda x: x, ref_auto_cond, ref_cond)
1424
+ ref_use_auto.click(lambda x: x, ref_auto_cond, ref_cond)
1425
+ ref_use_auto.click(lambda x: gr.Info("Automatic hand keypoints will be used for 'Reference'", duration=3))
1426
+ ref_pose.change(enable_component, [ref_kp_raw, ref_pose], ref_use_auto)
1427
  ref_pose.change(enable_component, [ref_img, ref_pose], ref_flip)
1428
+ ref_manual_pose.change(enable_component, [ref_img, ref_manual_pose], ref_flip)
1429
  ref_flip.select(
1430
+ flip_hand, [ref, ref_pose, ref_cond, gr.State(value=None), ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left], [ref, ref_pose, ref_cond, dump, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left]
1431
  )
1432
  target.change(enable_component, [target, target], target_finish_crop)
1433
  target_finish_crop.click(
 
1442
  [target, target_pose, target_cond, target_keypts],
1443
  )
1444
  ref_pose.change(enable_component, [ref_pose, target_pose], run)
1445
+ ref_manual_pose.change(enable_component, [ref_manual_pose, target_pose], run)
1446
  target_pose.change(enable_component, [ref_pose, target_pose], run)
1447
  run.click(
1448
  sample_diff,
 
1454
  [],
1455
  [
1456
  ref,
1457
+ ref_manual_kp_right,
1458
+ ref_manual_kp_left,
1459
  ref_pose,
1460
+ ref_manual_pose,
1461
  ref_flip,
1462
  target,
1463
  target_pose,
 
1466
  results_pose,
1467
  ref_img,
1468
  ref_cond,
 
1469
  target_img,
1470
  target_cond,
1471
  target_keypts,
1472
  n_generation,
1473
  seed,
1474
  cfg,
1475
+ ref_kp_raw,
1476
+ ref_manual_checkbox
1477
  ],
1478
  )
1479
+ clear.click(
1480
+ set_unvisible,
1481
+ [],
1482
+ [
1483
+ ref_manual_kp_r_info,
1484
+ ref_manual_kp_l_info,
1485
+ ref_manual_undo_left,
1486
+ ref_manual_undo_right,
1487
+ ref_manual_reset_left,
1488
+ ref_manual_reset_right,
1489
+ ref_manual_done,
1490
+ ref_manual_done_info,
1491
+ ref_manual_pose,
1492
+ ref_use_manual,
1493
+ ref_manual_kp_right,
1494
+ ref_manual_kp_left
1495
+ ]
1496
+ )
1497
 
 
 
 
 
 
 
 
1498
  with gr.Tab("Fix Hands"):
1499
  fix_inpaint_mask = gr.State(value=None)
1500
  fix_original = gr.State(value=None)
 
1505
  fix_target_cond = gr.State(value=None)
1506
  fix_latent = gr.State(value=None)
1507
  fix_inpaint_latent = gr.State(value=None)
 
 
 
1508
  with gr.Row():
1509
  with gr.Column():
1510
  gr.Markdown(
1511
+ """<p style="text-align: center; font-size: 20px; font-weight: bold;">1. Upload a malformed hand image to fix 📥</p>"""
 
 
 
1512
  )
1513
  gr.Markdown(
1514
+ """<p style="text-align: center;">&#9312; Optionally crop the image around the hand</p>"""
1515
  )
1516
  fix_crop = gr.ImageEditor(
1517
  type="numpy",
 
1526
  image_mode="RGBA",
1527
  container=False,
1528
  )
1529
+ fix_example = gr.Examples(
1530
+ fix_example_imgs,
1531
+ inputs=[fix_crop],
1532
+ examples_per_page=20,
1533
+ )
1534
  gr.Markdown(
1535
+ """<p style="text-align: center;">&#9313; Brush area (e.g., wrong finger) that needs to be fixed. This will serve as an inpaint mask</p>"""
1536
  )
1537
  fix_ref = gr.ImageEditor(
1538
  type="numpy",
 
1553
  fix_finish_crop = gr.Button(
1554
  value="Finish Croping & Brushing", interactive=False
1555
  )
 
 
 
 
 
 
 
 
 
 
 
 
1556
  with gr.Column():
1557
  gr.Markdown(
1558
+ """<p style="text-align: center; font-size: 20px; font-weight: bold;">2. Click on hand to get target hand pose</p>"""
1559
  )
1560
  gr.Markdown(
1561
+ """<p style="text-align: center;">&#9312; Tell us if this is right, left, or both hands</p>"""
1562
  )
1563
  fix_checkbox = gr.CheckboxGroup(
1564
  ["Right hand", "Left hand"],
1565
+ show_label=False,
 
 
1566
  interactive=False,
1567
  )
1568
+ gr.Markdown(
1569
+ """<p style="text-align: center;">&#9313; On the image, click 21 hand keypoints. This will serve as target hand poses. See the \"OpenPose keypoints convention\" for guidance.</p>"""
1570
+ )
1571
  fix_kp_r_info = gr.Markdown(
1572
  """<p style="text-align: center; font-size: 20px; font-weight: bold; ">Select right only</p>""",
1573
  visible=False,
 
1610
  fix_reset_left = gr.Button(
1611
  value="Reset", interactive=False, visible=False
1612
  )
 
1613
  gr.Markdown(
1614
+ """<p style="text-align: left; font-weight: bold; ">OpenPose keypoints convention</p>"""
1615
+ )
1616
+ fix_openpose = gr.Image(
1617
+ value="openpose.png",
1618
+ type="numpy",
1619
+ show_label=False,
1620
+ height=LENGTH // 2,
1621
+ width=LENGTH // 2,
1622
+ interactive=False,
1623
  )
1624
+ with gr.Column():
1625
  gr.Markdown(
1626
+ """<p style="text-align: center; font-size: 20px; font-weight: bold;">3. Press &quot;Ready&quot; to start pre-processing</p>"""
1627
  )
1628
  fix_ready = gr.Button(value="Ready", interactive=False)
 
 
 
 
 
 
1629
  gr.Markdown(
1630
+ """<p style="text-align: center; font-weight: bold; ">Visualized (256, 256) Inpaint Mask</p>"""
1631
  )
1632
  fix_vis_mask32 = gr.Image(
1633
  type="numpy",
 
1640
  )
1641
  fix_vis_mask256 = gr.Image(
1642
  type="numpy",
 
1643
  visible=True,
1644
+ show_label=False,
1645
  height=opts.image_size,
1646
  width=opts.image_size,
1647
  interactive=False,
1648
  )
1649
+ gr.Markdown(
1650
+ """<p style="text-align: center;">[NOTE] Above should be inpaint mask that you brushed, NOT the segmentation mask of the entire hand. </p>"""
1651
+ )
1652
  with gr.Column():
1653
  gr.Markdown(
1654
+ """<p style="text-align: center; font-size: 20px; font-weight: bold;">4. Press &quot;Run&quot; to get the fixed hand image 🎯</p>"""
1655
  )
1656
  fix_run = gr.Button(value="Run", interactive=False)
1657
  gr.Markdown(
1658
+ """<p style="text-align: center;">⚠️ >3min and ~24GB per generation</p>"""
1659
  )
1660
  fix_result = gr.Gallery(
1661
  type="numpy",
 
1677
  interactive=False,
1678
  preview=True,
1679
  )
1680
+ gr.Markdown(
1681
+ """<p style="text-align: center;">✨ Hit &quot;Clear&quot; to restart from the beginning</p>"""
1682
+ )
1683
  fix_clear = gr.ClearButton()
1684
+
1685
+ gr.Markdown(
1686
+ """<p style="text-align: left; font-size: 25px;"><b>More options</b></p>"""
1687
+ )
1688
  gr.Markdown(
1689
+ "⚠️ Currently, Number of generation > 1 could lead to out-of-memory"
1690
  )
1691
  with gr.Row():
1692
  fix_n_generation = gr.Slider(
 
1729
  fix_crop.change(resize_to_full, fix_crop, fix_ref)
1730
  fix_ref.change(enable_component, [fix_ref, fix_ref], fix_finish_crop)
1731
  fix_finish_crop.click(get_mask_inpaint, [fix_ref], [fix_inpaint_mask])
 
 
1732
  fix_finish_crop.click(lambda x: x["background"], [fix_crop], [fix_original])
1733
  fix_finish_crop.click(visualize_ref, [fix_crop, fix_ref], [fix_img])
1734
  fix_img.change(lambda x: x, [fix_img], [fix_kp_right])
 
1757
  fix_inpaint_mask.change(
1758
  enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_ready
1759
  )
 
 
 
1760
  fix_checkbox.select(
1761
  set_visible,
1762
  [fix_checkbox, fix_kpts, fix_img, fix_kp_right, fix_kp_left],
 
1792
  fix_reset_left.click(
1793
  reset_kps, [fix_img, fix_kpts, gr.State("left")], [fix_kp_left, fix_kpts]
1794
  )
 
 
1795
  fix_vis_mask32.change(
1796
  enable_component, [fix_vis_mask32, fix_vis_mask256], fix_run
1797
  )
 
 
 
1798
  fix_ready.click(
1799
  ready_sample,
1800
  [fix_original, fix_inpaint_mask, fix_kpts],
 
1808
  fix_vis_mask256,
1809
  ],
1810
  )
 
 
 
1811
  fix_run.click(
1812
  sample_inpaint,
1813
  [
 
1845
  fix_latent,
1846
  fix_inpaint_latent,
1847
  fix_n_generation,
 
1848
  fix_seed,
1849
  fix_cfg,
1850
  fix_quality,
1851
  ],
1852
  )
1853
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1854
  gr.Markdown("<h1>Citation</h1>")
1855
+ gr.Markdown(
1856
+ """<p style="text-align: left;">If this was useful, please cite us! ❤️</p>"""
1857
+ )
1858
  gr.Markdown(_CITE_)
1859
 
1860
+ print("Ready to launch..")
1861
+ _, _, shared_url = demo.queue().launch(
1862
+ share=True, server_name="0.0.0.0", server_port=7739
1863
+ )
1864
+ # demo.launch(share=True)
bad_hands/14.jpg ADDED

Git LFS Details

  • SHA256: f9dcd7eaf94c6f0d8ed1ed0f1c8cb500ad91ccd47a766c2363bbc845d6ae61d2
  • Pointer size: 131 Bytes
  • Size of remote file: 190 kB
bad_hands/15.jpg ADDED

Git LFS Details

  • SHA256: 92dfa5ee3db99ab7c9bbd7fe88d254ddb988b438d6092210d3bd31971fa29238
  • Pointer size: 130 Bytes
  • Size of remote file: 44.4 kB