chaerinmin commited on
Commit
33b5165
·
1 Parent(s): 3ceda9e

code cleanup, fixhand examples autoload, change to youtube

Browse files
Files changed (2) hide show
  1. .gitignore +1 -1
  2. app.py +121 -199
.gitignore CHANGED
@@ -1,4 +1,4 @@
1
- settings.json
2
  sbatch/err/
3
  sbatch/out/
4
  __pycache__/
 
1
+ .vscode/
2
  sbatch/err/
3
  sbatch/out/
4
  __pycache__/
app.py CHANGED
@@ -473,13 +473,10 @@ def visualize_ref(ref):
473
  img = ref["background"][..., :3]
474
 
475
  # viualization
476
- # img = cv2.resize(img, inpainted.shape[::-1], interpolation=cv2.INTER_AREA)
477
  mask = inpainted < 128
478
- # img = img.astype(np.int32)
479
- # img[mask, :] = img[mask, :] - 50
480
- # img[np.any(img<0, axis=-1)]=0
481
- # img = img.astype(np.uint8)
482
  img = mask_image(img, mask)
 
 
483
  return img, inpaint_mask
484
 
485
  def get_kps(img, keypoints, side: Literal["right", "left"], evt: gr.SelectData):
@@ -581,11 +578,6 @@ def process_crop(img, crop_coord, evt:gr.SelectData):
581
  cropped_vis[:,:,-1] = alpha
582
  else:
583
  gr.Error("Something is wrong", duration=3)
584
- # cropped_editor = {
585
- # "background": cropped,
586
- # "composite": cropped,
587
- # "layers": [cropped_vis]
588
- # }
589
  return crop_coord, cropped, cropped_vis
590
 
591
  def disable_crop(crop_coord):
@@ -653,8 +645,6 @@ def ready_sample(img_cropped, inpaint_mask, keypts, keypts_np):
653
  keypts[0] = np.zeros((21, 2))
654
  elif len(keypts[0]) == 21:
655
  keypts[0] = np.array(keypts[0], dtype=np.float32)
656
- # keypts[0][:, 0] = keypts[0][:, 0] + crop_coord[0][0]
657
- # keypts[0][:, 1] = keypts[0][:, 1] + crop_coord[0][1]
658
  else:
659
  gr.Info("Number of right hand keypoints should be either 0 or 21.")
660
  return None, None
@@ -662,8 +652,6 @@ def ready_sample(img_cropped, inpaint_mask, keypts, keypts_np):
662
  keypts[1] = np.zeros((21, 2))
663
  elif len(keypts[1]) == 21:
664
  keypts[1] = np.array(keypts[1], dtype=np.float32)
665
- # keypts[1][:, 0] = keypts[1][:, 0] + crop_coord[0][0]
666
- # keypts[1][:, 1] = keypts[1][:, 1] + crop_coord[0][1]
667
  else:
668
  gr.Info("Number of left hand keypoints should be either 0 or 21.")
669
  return None, None
@@ -868,13 +856,7 @@ def flip_hand(
868
  return
869
  img["composite"] = img["composite"][:, ::-1, :]
870
  img["background"] = img["background"][:, ::-1, :]
871
- img["layers"] = [layer[:, ::-1, :] for layer in img["layers"]]
872
- # for comp in [pose_img, pose_manual_img, manual_kp_right, manual_kp_left, cond, auto_cond, manual_cond]:
873
- # if comp is not None:
874
- # if isinstance(comp, torch.Tensor):
875
- # comp = comp.flip(-1)
876
- # else:
877
- # comp = comp[:, ::-1, :]
878
  if img_raw is not None:
879
  img_raw = img_raw[:, ::-1, :]
880
  pose_img = pose_img[:, ::-1, :]
@@ -889,12 +871,6 @@ def flip_hand(
889
  auto_cond = auto_cond.flip(-1)
890
  if manual_cond is not None:
891
  manual_cond = manual_cond.flip(-1)
892
- # for comp in [keypts, auto_keypts, manual_keypts]:
893
- # if comp is not None:
894
- # if comp[:21, :].sum() != 0:
895
- # comp[:21, 0] = opts.image_size[1] - comp[:21, 0]
896
- # if comp[21:, :].sum() != 0:
897
- # comp[21:, 0] = opts.image_size[1] - comp[21:, 0]
898
  if keypts is not None:
899
  if keypts[:21, :].sum() != 0:
900
  keypts[:21, 0] = opts.image_size[1] - keypts[:21, 0]
@@ -980,6 +956,10 @@ def fix_clear_all():
980
  def enable_component(image1, image2):
981
  if image1 is None or image2 is None:
982
  return gr.update(interactive=False)
 
 
 
 
983
  if isinstance(image1, dict) and "background" in image1 and "layers" in image1 and "composite" in image1:
984
  if image1["background"] is None or (
985
  image1["background"].sum() == 0
@@ -1109,61 +1089,14 @@ example_ref_imgs = [
1109
  [
1110
  "sample_images/sample4.jpg",
1111
  ],
1112
- # [
1113
- # "sample_images/sample5.jpg",
1114
- # ],
1115
  [
1116
  "sample_images/sample6.jpg",
1117
  ],
1118
- # [
1119
- # "sample_images/sample7.jpg",
1120
- # ],
1121
- # [
1122
- # "sample_images/sample8.jpg",
1123
- # ],
1124
- # [
1125
- # "sample_images/sample9.jpg",
1126
- # ],
1127
- # [
1128
- # "sample_images/sample10.jpg",
1129
- # ],
1130
- # [
1131
- # "sample_images/sample11.jpg",
1132
- # ],
1133
- # ["pose_images/pose1.jpg"],
1134
- # ["pose_images/pose2.jpg"],
1135
- # ["pose_images/pose3.jpg"],
1136
- # ["pose_images/pose4.jpg"],
1137
- # ["pose_images/pose5.jpg"],
1138
- # ["pose_images/pose6.jpg"],
1139
- # ["pose_images/pose7.jpg"],
1140
- # ["pose_images/pose8.jpg"],
1141
  ]
1142
  example_target_imgs = [
1143
- # [
1144
- # "sample_images/sample1.jpg",
1145
- # ],
1146
- # [
1147
- # "sample_images/sample2.jpg",
1148
- # ],
1149
- # [
1150
- # "sample_images/sample3.jpg",
1151
- # ],
1152
- # [
1153
- # "sample_images/sample4.jpg",
1154
- # ],
1155
  [
1156
  "sample_images/sample5.jpg",
1157
  ],
1158
- # [
1159
- # "sample_images/sample6.jpg",
1160
- # ],
1161
- # [
1162
- # "sample_images/sample7.jpg",
1163
- # ],
1164
- # [
1165
- # "sample_images/sample8.jpg",
1166
- # ],
1167
  [
1168
  "sample_images/sample9.jpg",
1169
  ],
@@ -1174,40 +1107,22 @@ example_target_imgs = [
1174
  "sample_images/sample11.jpg",
1175
  ],
1176
  ["pose_images/pose1.jpg"],
1177
- # ["pose_images/pose2.jpg"],
1178
- # ["pose_images/pose3.jpg"],
1179
- # ["pose_images/pose4.jpg"],
1180
- # ["pose_images/pose5.jpg"],
1181
- # ["pose_images/pose6.jpg"],
1182
- # ["pose_images/pose7.jpg"],
1183
- # ["pose_images/pose8.jpg"],
1184
  ]
1185
  fix_example_imgs = [
1186
- ["bad_hands/1.jpg"], # "bad_hands/1_mask.jpg"],
1187
- # ["bad_hands/2.jpg"], # "bad_hands/2_mask.jpg"],
1188
- ["bad_hands/3.jpg"], # "bad_hands/3_mask.jpg"],
1189
- ["bad_hands/4.jpg"], # "bad_hands/4_mask.jpg"],
1190
- ["bad_hands/5.jpg"], # "bad_hands/5_mask.jpg"],
1191
- ["bad_hands/6.jpg"], # "bad_hands/6_mask.jpg"],
1192
- ["bad_hands/7.jpg"], # "bad_hands/7_mask.jpg"],
1193
- # ["bad_hands/8.jpg"], # "bad_hands/8_mask.jpg"],
1194
- # ["bad_hands/9.jpg"], # "bad_hands/9_mask.jpg"],
1195
- # ["bad_hands/10.jpg"], # "bad_hands/10_mask.jpg"],
1196
- # ["bad_hands/11.jpg"], # "bad_hands/11_mask.jpg"],
1197
- # ["bad_hands/12.jpg"], # "bad_hands/12_mask.jpg"],
1198
- # ["bad_hands/13.jpg"], # "bad_hands/13_mask.jpg"],
1199
- # ["bad_hands/14.jpg"],
1200
- # ["bad_hands/15.jpg"],
1201
  ]
1202
  fix_example_brush = [
1203
- ["bad_hands/1_composite.png"],# "bad_hands/1.jpg"],
1204
- ["bad_hands/3_composite.png"],# "bad_hands/3.jpg"],
1205
- ["bad_hands/4_composite.png"],# "bad_hands/4.jpg"],
1206
- ["bad_hands/5_composite.png"],# "bad_hands/5.jpg"],
1207
- ["bad_hands/6_composite.png"],# "bad_hands/6.jpg"],
1208
- ["bad_hands/7_composite.png"],# "bad_hands/7.jpg"],
1209
- # ["bad_hands/14_mask.jpg"],
1210
- # ["bad_hands/15_mask.jpg"],
1211
  ]
1212
  fix_example_kpts = [
1213
  ["bad_hands/1_kpts.png", 3.0, 1224],
@@ -1217,9 +1132,20 @@ fix_example_kpts = [
1217
  ["bad_hands/6_kpts.png", 3.0, 1348],
1218
  ["bad_hands/7_kpts.png", 3.0, 42],
1219
  ]
 
 
 
 
 
 
 
 
1220
  for i in range(len(fix_example_kpts)):
1221
  npy_path = fix_example_kpts[i][0].replace("_kpts.png", ".npy")
1222
  fix_example_kpts[i].append(npy_path)
 
 
 
1223
 
1224
  custom_css = """
1225
  .gradio-container .examples img {
@@ -1248,6 +1174,18 @@ custom_css = """
1248
  #kpts_examples table tr td:nth-child(4) {
1249
  display: none !important;
1250
  }
 
 
 
 
 
 
 
 
 
 
 
 
1251
  #repose_tutorial video {
1252
  width: 70% !important;
1253
  display: block;
@@ -1256,10 +1194,35 @@ custom_css = """
1256
  }
1257
  """
1258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1259
  _HEADER_ = '''
1260
  <div style="text-align: center;">
1261
  <h1><b>FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation</b></h1>
1262
- <h2 style="color: #777777;">CVPR 2025 <span style="color: #990000; font-style: italic;">highlight</span></h2>
1263
  <style>
1264
  .link-spacing {
1265
  margin-right: 20px;
@@ -1280,8 +1243,7 @@ _HEADER_ = '''
1280
  <h3>
1281
  <a href='https://arxiv.org/abs/2412.02690' target='_blank' class="link-spacing">Paper</a>
1282
  <a href='https://ivl.cs.brown.edu/research/foundhand.html' target='_blank' class="link-spacing">Project Page</a>
1283
- <a href='' target='_blank' class="link-spacing">Code</a>
1284
- <a href='' target='_blank'>Model Weights</a>
1285
  </h3>
1286
  <p>Below are two important abilities of our model. First, we can automatically <b>fix malformed hand images</b>, following the user-provided target hand pose and area to fix. Second, we can <b>repose hand</b> given two hand images - one is the image to edit, and the other one provides target hand pose.</p>
1287
  </div>
@@ -1323,21 +1285,23 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
1323
  gr.Markdown("""<p style="text-align: center; font-size: 20px; font-weight: bold;">Tutorial Videos of Demo 1</p>""")
1324
  with gr.Row(variant="panel"):
1325
  with gr.Column():
1326
- gr.Video(
1327
- "how_to_videos/subtitled_fix_hands_custom.mp4",
1328
- label="Using your own image",
1329
- autoplay=True,
1330
- loop=True,
1331
- show_label=True,
1332
- )
 
1333
  with gr.Column():
1334
- gr.Video(
1335
- "how_to_videos/subtitled_fix_hands_example.mp4",
1336
- label="Using our example image",
1337
- autoplay=True,
1338
- loop=True,
1339
- show_label=True,
1340
- )
 
1341
 
1342
  # more options
1343
  with gr.Accordion(label="More options", open=False):
@@ -1392,20 +1356,6 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
1392
  gr.Markdown(
1393
  """<p style="text-align: center;">Optionally crop the image.<br>(Click <b>top left</b> and <b>bottom right</b> of your desired bounding box around the hand)</p>"""
1394
  )
1395
- # fix_crop = gr.ImageEditor(
1396
- # type="numpy",
1397
- # sources=["upload", "webcam", "clipboard"],
1398
- # label="Image crop",
1399
- # show_label=True,
1400
- # height=LENGTH,
1401
- # width=LENGTH,
1402
- # layers=False,
1403
- # # crop_size="1:1",
1404
- # transforms=(),
1405
- # brush=False,
1406
- # image_mode="RGBA",
1407
- # container=False,
1408
- # )
1409
  fix_crop = gr.Image(
1410
  type="numpy",
1411
  sources=["upload", "webcam", "clipboard"],
@@ -1420,23 +1370,11 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
1420
  gr.Markdown(
1421
  """<p style="text-align: center;">💡 If you crop, the model can focus on more details of the cropped area. Square crops might work better than rectangle crops.</p>"""
1422
  )
1423
- # fix_tmp = gr.Image(
1424
- # type="numpy",
1425
- # label="tmp",
1426
- # show_label=True,
1427
- # height=LENGTH,
1428
- # width=LENGTH,
1429
- # interactive=True,
1430
- # visible=True,
1431
- # sources=[],
1432
  # )
1433
- fix_example = gr.Examples(
1434
- fix_example_imgs,
1435
- inputs=[fix_crop],
1436
- examples_per_page=20,
1437
- # run_on_click=True,
1438
- # fn=load_brush,
1439
- )
1440
  with gr.Column():
1441
  gr.Markdown(
1442
  """<p style="text-align: center; font-size: 18px; font-weight: bold;">2. Brush wrong finger and its surrounding area</p>"""
@@ -1460,19 +1398,10 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
1460
  container=False,
1461
  interactive=True,
1462
  )
1463
- fix_ex_brush = gr.Examples(
1464
- fix_example_brush,
1465
- inputs=[fix_ref],
1466
- # outputs=[fix_ref],
1467
- examples_per_page=20,
1468
- # run_on_click=True,
1469
- # fn=inpaint_from_example,
1470
- )
1471
- # gr.Markdown(
1472
- # """<p style="text-align: center;">&#9314; Hit the \"Finish Cropping & Brushing\" button</p>"""
1473
- # )
1474
- # fix_finish_crop = gr.Button(
1475
- # value="Finish Croping & Brushing", interactive=False
1476
  # )
1477
 
1478
  # keypoint selection
@@ -1485,8 +1414,8 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
1485
  )
1486
  fix_kp_all = gr.Image(
1487
  type="numpy",
1488
- # label="Keypoints",
1489
- show_label=False,
1490
  height=LENGTH,
1491
  width=LENGTH,
1492
  interactive=False,
@@ -1494,14 +1423,14 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
1494
  sources=(),
1495
  image_mode="RGBA"
1496
  )
1497
- with gr.Accordion(open=True):
1498
- fix_ex_kpts = gr.Examples(
1499
- fix_example_kpts,
1500
- inputs=[fix_kp_all, fix_cfg, fix_seed, fix_kpts_path],
1501
- examples_per_page=20,
1502
- postprocess=False,
1503
- elem_id="kpts_examples"
1504
- )
1505
  with gr.Accordion("[Custom data] Manually give hand pose", open=False):
1506
  gr.Markdown(
1507
  """<p style="text-align: center;">&#9312; Tell us if this is right, left, or both hands</p>"""
@@ -1515,10 +1444,6 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
1515
  """<p style="text-align: center;">&#9313; Click 21 keypoints on the image to provide the target hand pose of <b>right hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
1516
  visible=False
1517
  )
1518
- # fix_kp_r_info = gr.Markdown(
1519
- # """<p style="text-align: center; font-size: 20px; font-weight: bold; ">Select right only</p>""",
1520
- # visible=False,
1521
- # )
1522
  fix_kp_right = gr.Image(
1523
  type="numpy",
1524
  label="Keypoint Selection (right hand)",
@@ -1569,21 +1494,11 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
1569
  interactive=False,
1570
  )
1571
 
1572
- # get latent
1573
- # with gr.Column():
1574
-
1575
  # result column
1576
  with gr.Column():
1577
  gr.Markdown(
1578
  """<p style="text-align: center; font-size: 18px; font-weight: bold;">4. Press &quot;Run&quot; to get the corrected hand image 🎯</p>"""
1579
  )
1580
- # gr.Markdown(
1581
- # """<p style="text-align: center; font-size: 18px; font-weight: bold;">3. Press &quot;Ready&quot; to start pre-processing</p>"""
1582
- # )
1583
- # fix_ready = gr.Button(value="Ready", interactive=False)
1584
- # gr.Markdown(
1585
- # """<p style="text-align: center; font-weight: bold; ">Visualized (256, 256)-resized, brushed image</p>"""
1586
- # )
1587
  fix_vis_mask32 = gr.Image(
1588
  type="numpy",
1589
  label=f"Visualized {opts.latent_size} Inpaint Mask",
@@ -1603,9 +1518,6 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
1603
  interactive=False,
1604
  visible=True,
1605
  )
1606
- # gr.Markdown(
1607
- # """<p style="text-align: center;">[NOTE] Above should be inpaint mask that you brushed, NOT the segmentation mask of the entire hand. </p>"""
1608
- # )
1609
  gr.Markdown(
1610
  """<p style="text-align: center;">⚠️ >3min and ~24GB per generation</p>"""
1611
  )
@@ -1645,7 +1557,14 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
1645
  )
1646
  fix_clear = gr.ClearButton()
1647
 
1648
-
 
 
 
 
 
 
 
1649
  # listeners
1650
  fix_crop.change(stash_original, fix_crop, fix_original) # fix_original: (real_H, real_W, 3)
1651
  fix_crop.change(stay_crop, [fix_crop, fix_crop_coord], [fix_crop_coord, fix_ref])
@@ -1713,6 +1632,8 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
1713
  reset_kps, [fix_img, fix_kpts, gr.State("left")], [fix_kp_left, fix_kpts]
1714
  )
1715
  fix_kpts_path.change(read_kpts, fix_kpts_path, fix_kpts_np)
 
 
1716
  fix_run.click(
1717
  ready_sample,
1718
  [fix_ref, fix_inpaint_mask, fix_kpts, fix_kpts_np],
@@ -1820,13 +1741,14 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
1820
  gr.Markdown("""<p style="text-align: center; font-size: 20px; font-weight: bold;">Tutorial Videos of Demo 2</p>""")
1821
  with gr.Row(variant="panel", elem_id="repose_tutorial"):
1822
  with gr.Column():
1823
- gr.Video(
1824
- "how_to_videos/subtitled_repose_hands.mp4",
1825
- label="Tutorial",
1826
- autoplay=True,
1827
- loop=True,
1828
- show_label=True,
1829
- )
 
1830
 
1831
  # main tabs
1832
  with gr.Row():
 
473
  img = ref["background"][..., :3]
474
 
475
  # viualization
 
476
  mask = inpainted < 128
 
 
 
 
477
  img = mask_image(img, mask)
478
+ if inpaint_mask.sum() == 0:
479
+ gr.Warning("Run botton not enabled? Please try again.", duration=10)
480
  return img, inpaint_mask
481
 
482
  def get_kps(img, keypoints, side: Literal["right", "left"], evt: gr.SelectData):
 
578
  cropped_vis[:,:,-1] = alpha
579
  else:
580
  gr.Error("Something is wrong", duration=3)
 
 
 
 
 
581
  return crop_coord, cropped, cropped_vis
582
 
583
  def disable_crop(crop_coord):
 
645
  keypts[0] = np.zeros((21, 2))
646
  elif len(keypts[0]) == 21:
647
  keypts[0] = np.array(keypts[0], dtype=np.float32)
 
 
648
  else:
649
  gr.Info("Number of right hand keypoints should be either 0 or 21.")
650
  return None, None
 
652
  keypts[1] = np.zeros((21, 2))
653
  elif len(keypts[1]) == 21:
654
  keypts[1] = np.array(keypts[1], dtype=np.float32)
 
 
655
  else:
656
  gr.Info("Number of left hand keypoints should be either 0 or 21.")
657
  return None, None
 
856
  return
857
  img["composite"] = img["composite"][:, ::-1, :]
858
  img["background"] = img["background"][:, ::-1, :]
859
+ img["layers"] = [layer[:, ::-1, :] for layer in img["layers"]]
 
 
 
 
 
 
860
  if img_raw is not None:
861
  img_raw = img_raw[:, ::-1, :]
862
  pose_img = pose_img[:, ::-1, :]
 
871
  auto_cond = auto_cond.flip(-1)
872
  if manual_cond is not None:
873
  manual_cond = manual_cond.flip(-1)
 
 
 
 
 
 
874
  if keypts is not None:
875
  if keypts[:21, :].sum() != 0:
876
  keypts[:21, 0] = opts.image_size[1] - keypts[:21, 0]
 
956
  def enable_component(image1, image2):
957
  if image1 is None or image2 is None:
958
  return gr.update(interactive=False)
959
+ if isinstance(image1, np.ndarray) and image1.sum() == 0:
960
+ return gr.update(interactive=False)
961
+ if isinstance(image2, np.ndarray) and image2.sum() == 0:
962
+ return gr.update(interactive=False)
963
  if isinstance(image1, dict) and "background" in image1 and "layers" in image1 and "composite" in image1:
964
  if image1["background"] is None or (
965
  image1["background"].sum() == 0
 
1089
  [
1090
  "sample_images/sample4.jpg",
1091
  ],
 
 
 
1092
  [
1093
  "sample_images/sample6.jpg",
1094
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1095
  ]
1096
  example_target_imgs = [
 
 
 
 
 
 
 
 
 
 
 
 
1097
  [
1098
  "sample_images/sample5.jpg",
1099
  ],
 
 
 
 
 
 
 
 
 
1100
  [
1101
  "sample_images/sample9.jpg",
1102
  ],
 
1107
  "sample_images/sample11.jpg",
1108
  ],
1109
  ["pose_images/pose1.jpg"],
 
 
 
 
 
 
 
1110
  ]
1111
  fix_example_imgs = [
1112
+ ["bad_hands/1.jpg"],
1113
+ ["bad_hands/3.jpg"],
1114
+ ["bad_hands/4.jpg"],
1115
+ ["bad_hands/5.jpg"],
1116
+ ["bad_hands/6.jpg"],
1117
+ ["bad_hands/7.jpg"],
 
 
 
 
 
 
 
 
 
1118
  ]
1119
  fix_example_brush = [
1120
+ ["bad_hands/1_composite.png"],
1121
+ ["bad_hands/3_composite.png"],
1122
+ ["bad_hands/4_composite.png"],
1123
+ ["bad_hands/5_composite.png"],
1124
+ ["bad_hands/6_composite.png"],
1125
+ ["bad_hands/7_composite.png"],
 
 
1126
  ]
1127
  fix_example_kpts = [
1128
  ["bad_hands/1_kpts.png", 3.0, 1224],
 
1132
  ["bad_hands/6_kpts.png", 3.0, 1348],
1133
  ["bad_hands/7_kpts.png", 3.0, 42],
1134
  ]
1135
+ fix_example_all = [
1136
+ ["bad_hands/1.jpg", "bad_hands/1_composite.png", "bad_hands/1_kpts.png", 3.0, 1224],
1137
+ ["bad_hands/3.jpg", "bad_hands/3_composite.png", "bad_hands/3_kpts.png", 1.0, 42],
1138
+ ["bad_hands/4.jpg", "bad_hands/4_composite.png", "bad_hands/4_kpts.png", 2.0, 42],
1139
+ ["bad_hands/5.jpg", "bad_hands/5_composite.png", "bad_hands/5_kpts.png", 3.0, 42],
1140
+ ["bad_hands/6.jpg", "bad_hands/6_composite.png", "bad_hands/6_kpts.png", 3.0, 1348],
1141
+ ["bad_hands/7.jpg", "bad_hands/7_composite.png", "bad_hands/7_kpts.png", 3.0, 42],
1142
+ ]
1143
  for i in range(len(fix_example_kpts)):
1144
  npy_path = fix_example_kpts[i][0].replace("_kpts.png", ".npy")
1145
  fix_example_kpts[i].append(npy_path)
1146
+ for i in range(len(fix_example_all)):
1147
+ npy_path = fix_example_all[i][2].replace("_kpts.png", ".npy")
1148
+ fix_example_all[i].append(npy_path)
1149
 
1150
  custom_css = """
1151
  .gradio-container .examples img {
 
1174
  #kpts_examples table tr td:nth-child(4) {
1175
  display: none !important;
1176
  }
1177
+ #fix_examples_all table tr th:nth-child(4),
1178
+ #fix_examples_all table tr td:nth-child(4) {
1179
+ display: none !important;
1180
+ }
1181
+ #fix_examples_all table tr th:nth-child(5),
1182
+ #fix_examples_all table tr td:nth-child(5) {
1183
+ display: none !important;
1184
+ }
1185
+ #fix_examples_all table tr th:nth-child(6),
1186
+ #fix_examples_all table tr td:nth-child(6) {
1187
+ display: none !important;
1188
+ }
1189
  #repose_tutorial video {
1190
  width: 70% !important;
1191
  display: block;
 
1194
  }
1195
  """
1196
 
1197
+ tut1_custom = f"""
1198
+ <iframe style="width:100%; aspect-ratio: 12/9;"
1199
+ src="https://www.youtube.com/embed/fQk7cOjSCVc"
1200
+ title="Using your own image" frameborder="0"
1201
+ allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
1202
+ allowfullscreen>
1203
+ </iframe>
1204
+ """
1205
+ tut1_example = f"""
1206
+ <iframe style="width:100%; aspect-ratio: 12/9;"
1207
+ src="https://www.youtube.com/embed/-Dq0XTYwTHA"
1208
+ title="Using your own image" frameborder="0"
1209
+ allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
1210
+ allowfullscreen>
1211
+ </iframe>
1212
+ """
1213
+ tut2_example = f"""
1214
+ <iframe style="width:50%; aspect-ratio: 12/9; display:block; margin-left:auto; margin-right:auto;"
1215
+ src="https://www.youtube.com/embed/y2CbzUG2uM0"
1216
+ title="Using your own image" frameborder="0"
1217
+ allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
1218
+ allowfullscreen>
1219
+ </iframe>
1220
+ """
1221
+
1222
  _HEADER_ = '''
1223
  <div style="text-align: center;">
1224
  <h1><b>FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation</b></h1>
1225
+ <h2 style="color: #777777;">CVPR 2025 <span style="color: #990000; font-style: italic;">Highlight</span></h2>
1226
  <style>
1227
  .link-spacing {
1228
  margin-right: 20px;
 
1243
  <h3>
1244
  <a href='https://arxiv.org/abs/2412.02690' target='_blank' class="link-spacing">Paper</a>
1245
  <a href='https://ivl.cs.brown.edu/research/foundhand.html' target='_blank' class="link-spacing">Project Page</a>
1246
+ <a href='' target='_blank' class="link-spacing">Code (Coming in June)</a>
 
1247
  </h3>
1248
  <p>Below are two important abilities of our model. First, we can automatically <b>fix malformed hand images</b>, following the user-provided target hand pose and area to fix. Second, we can <b>repose hand</b> given two hand images - one is the image to edit, and the other one provides target hand pose.</p>
1249
  </div>
 
1285
  gr.Markdown("""<p style="text-align: center; font-size: 20px; font-weight: bold;">Tutorial Videos of Demo 1</p>""")
1286
  with gr.Row(variant="panel"):
1287
  with gr.Column():
1288
+ # gr.Video(
1289
+ # "how_to_videos/subtitled_fix_hands_custom.mp4",
1290
+ # label="Using your own image",
1291
+ # autoplay=True,
1292
+ # loop=True,
1293
+ # show_label=True,
1294
+ # )
1295
+ gr.HTML(tut1_custom)
1296
  with gr.Column():
1297
+ # gr.Video(
1298
+ # "how_to_videos/subtitled_fix_hands_example.mp4",
1299
+ # label="Using our example image",
1300
+ # autoplay=True,
1301
+ # loop=True,
1302
+ # show_label=True,
1303
+ # )
1304
+ gr.HTML(tut1_example)
1305
 
1306
  # more options
1307
  with gr.Accordion(label="More options", open=False):
 
1356
  gr.Markdown(
1357
  """<p style="text-align: center;">Optionally crop the image.<br>(Click <b>top left</b> and <b>bottom right</b> of your desired bounding box around the hand)</p>"""
1358
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1359
  fix_crop = gr.Image(
1360
  type="numpy",
1361
  sources=["upload", "webcam", "clipboard"],
 
1370
  gr.Markdown(
1371
  """<p style="text-align: center;">💡 If you crop, the model can focus on more details of the cropped area. Square crops might work better than rectangle crops.</p>"""
1372
  )
1373
+ # fix_example = gr.Examples(
1374
+ # fix_example_imgs,
1375
+ # inputs=[fix_crop],
1376
+ # examples_per_page=20,
 
 
 
 
 
1377
  # )
 
 
 
 
 
 
 
1378
  with gr.Column():
1379
  gr.Markdown(
1380
  """<p style="text-align: center; font-size: 18px; font-weight: bold;">2. Brush wrong finger and its surrounding area</p>"""
 
1398
  container=False,
1399
  interactive=True,
1400
  )
1401
+ # fix_ex_brush = gr.Examples(
1402
+ # fix_example_brush,
1403
+ # inputs=[fix_ref],
1404
+ # examples_per_page=20,
 
 
 
 
 
 
 
 
 
1405
  # )
1406
 
1407
  # keypoint selection
 
1414
  )
1415
  fix_kp_all = gr.Image(
1416
  type="numpy",
1417
+ label="Target Hand Pose",
1418
+ show_label=True,
1419
  height=LENGTH,
1420
  width=LENGTH,
1421
  interactive=False,
 
1423
  sources=(),
1424
  image_mode="RGBA"
1425
  )
1426
+ # with gr.Accordion(open=True):
1427
+ # fix_ex_kpts = gr.Examples(
1428
+ # fix_example_kpts,
1429
+ # inputs=[fix_kp_all, fix_cfg, fix_seed, fix_kpts_path],
1430
+ # examples_per_page=20,
1431
+ # postprocess=False,
1432
+ # elem_id="kpts_examples"
1433
+ # )
1434
  with gr.Accordion("[Custom data] Manually give hand pose", open=False):
1435
  gr.Markdown(
1436
  """<p style="text-align: center;">&#9312; Tell us if this is right, left, or both hands</p>"""
 
1444
  """<p style="text-align: center;">&#9313; Click 21 keypoints on the image to provide the target hand pose of <b>right hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
1445
  visible=False
1446
  )
 
 
 
 
1447
  fix_kp_right = gr.Image(
1448
  type="numpy",
1449
  label="Keypoint Selection (right hand)",
 
1494
  interactive=False,
1495
  )
1496
 
 
 
 
1497
  # result column
1498
  with gr.Column():
1499
  gr.Markdown(
1500
  """<p style="text-align: center; font-size: 18px; font-weight: bold;">4. Press &quot;Run&quot; to get the corrected hand image 🎯</p>"""
1501
  )
 
 
 
 
 
 
 
1502
  fix_vis_mask32 = gr.Image(
1503
  type="numpy",
1504
  label=f"Visualized {opts.latent_size} Inpaint Mask",
 
1518
  interactive=False,
1519
  visible=True,
1520
  )
 
 
 
1521
  gr.Markdown(
1522
  """<p style="text-align: center;">⚠️ >3min and ~24GB per generation</p>"""
1523
  )
 
1557
  )
1558
  fix_clear = gr.ClearButton()
1559
 
1560
+ gr.Examples(
1561
+ fix_example_all,
1562
+ inputs=[fix_crop, fix_ref, fix_kp_all, fix_cfg, fix_seed, fix_kpts_path],
1563
+ examples_per_page=20,
1564
+ postprocess=False,
1565
+ elem_id="fix_examples_all",
1566
+ )
1567
+
1568
  # listeners
1569
  fix_crop.change(stash_original, fix_crop, fix_original) # fix_original: (real_H, real_W, 3)
1570
  fix_crop.change(stay_crop, [fix_crop, fix_crop_coord], [fix_crop_coord, fix_ref])
 
1632
  reset_kps, [fix_img, fix_kpts, gr.State("left")], [fix_kp_left, fix_kpts]
1633
  )
1634
  fix_kpts_path.change(read_kpts, fix_kpts_path, fix_kpts_np)
1635
+ fix_inpaint_mask.change(enable_component, [fix_inpaint_mask, fix_kpts_np], fix_run)
1636
+ fix_kpts_np.change(enable_component, [fix_inpaint_mask, fix_kpts_np], fix_run)
1637
  fix_run.click(
1638
  ready_sample,
1639
  [fix_ref, fix_inpaint_mask, fix_kpts, fix_kpts_np],
 
1741
  gr.Markdown("""<p style="text-align: center; font-size: 20px; font-weight: bold;">Tutorial Videos of Demo 2</p>""")
1742
  with gr.Row(variant="panel", elem_id="repose_tutorial"):
1743
  with gr.Column():
1744
+ # gr.Video(
1745
+ # "how_to_videos/subtitled_repose_hands.mp4",
1746
+ # label="Tutorial",
1747
+ # autoplay=True,
1748
+ # loop=True,
1749
+ # show_label=True,
1750
+ # )
1751
+ gr.HTML(tut2_example)
1752
 
1753
  # main tabs
1754
  with gr.Row():