Spaces:
Runtime error
Runtime error
Commit
·
33b5165
1
Parent(s):
3ceda9e
code cleanup, fixhand examples autoload, change to youtube
Browse files- .gitignore +1 -1
- app.py +121 -199
.gitignore
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
sbatch/err/
|
3 |
sbatch/out/
|
4 |
__pycache__/
|
|
|
1 |
+
.vscode/
|
2 |
sbatch/err/
|
3 |
sbatch/out/
|
4 |
__pycache__/
|
app.py
CHANGED
@@ -473,13 +473,10 @@ def visualize_ref(ref):
|
|
473 |
img = ref["background"][..., :3]
|
474 |
|
475 |
# viualization
|
476 |
-
# img = cv2.resize(img, inpainted.shape[::-1], interpolation=cv2.INTER_AREA)
|
477 |
mask = inpainted < 128
|
478 |
-
# img = img.astype(np.int32)
|
479 |
-
# img[mask, :] = img[mask, :] - 50
|
480 |
-
# img[np.any(img<0, axis=-1)]=0
|
481 |
-
# img = img.astype(np.uint8)
|
482 |
img = mask_image(img, mask)
|
|
|
|
|
483 |
return img, inpaint_mask
|
484 |
|
485 |
def get_kps(img, keypoints, side: Literal["right", "left"], evt: gr.SelectData):
|
@@ -581,11 +578,6 @@ def process_crop(img, crop_coord, evt:gr.SelectData):
|
|
581 |
cropped_vis[:,:,-1] = alpha
|
582 |
else:
|
583 |
gr.Error("Something is wrong", duration=3)
|
584 |
-
# cropped_editor = {
|
585 |
-
# "background": cropped,
|
586 |
-
# "composite": cropped,
|
587 |
-
# "layers": [cropped_vis]
|
588 |
-
# }
|
589 |
return crop_coord, cropped, cropped_vis
|
590 |
|
591 |
def disable_crop(crop_coord):
|
@@ -653,8 +645,6 @@ def ready_sample(img_cropped, inpaint_mask, keypts, keypts_np):
|
|
653 |
keypts[0] = np.zeros((21, 2))
|
654 |
elif len(keypts[0]) == 21:
|
655 |
keypts[0] = np.array(keypts[0], dtype=np.float32)
|
656 |
-
# keypts[0][:, 0] = keypts[0][:, 0] + crop_coord[0][0]
|
657 |
-
# keypts[0][:, 1] = keypts[0][:, 1] + crop_coord[0][1]
|
658 |
else:
|
659 |
gr.Info("Number of right hand keypoints should be either 0 or 21.")
|
660 |
return None, None
|
@@ -662,8 +652,6 @@ def ready_sample(img_cropped, inpaint_mask, keypts, keypts_np):
|
|
662 |
keypts[1] = np.zeros((21, 2))
|
663 |
elif len(keypts[1]) == 21:
|
664 |
keypts[1] = np.array(keypts[1], dtype=np.float32)
|
665 |
-
# keypts[1][:, 0] = keypts[1][:, 0] + crop_coord[0][0]
|
666 |
-
# keypts[1][:, 1] = keypts[1][:, 1] + crop_coord[0][1]
|
667 |
else:
|
668 |
gr.Info("Number of left hand keypoints should be either 0 or 21.")
|
669 |
return None, None
|
@@ -868,13 +856,7 @@ def flip_hand(
|
|
868 |
return
|
869 |
img["composite"] = img["composite"][:, ::-1, :]
|
870 |
img["background"] = img["background"][:, ::-1, :]
|
871 |
-
img["layers"] = [layer[:, ::-1, :] for layer in img["layers"]]
|
872 |
-
# for comp in [pose_img, pose_manual_img, manual_kp_right, manual_kp_left, cond, auto_cond, manual_cond]:
|
873 |
-
# if comp is not None:
|
874 |
-
# if isinstance(comp, torch.Tensor):
|
875 |
-
# comp = comp.flip(-1)
|
876 |
-
# else:
|
877 |
-
# comp = comp[:, ::-1, :]
|
878 |
if img_raw is not None:
|
879 |
img_raw = img_raw[:, ::-1, :]
|
880 |
pose_img = pose_img[:, ::-1, :]
|
@@ -889,12 +871,6 @@ def flip_hand(
|
|
889 |
auto_cond = auto_cond.flip(-1)
|
890 |
if manual_cond is not None:
|
891 |
manual_cond = manual_cond.flip(-1)
|
892 |
-
# for comp in [keypts, auto_keypts, manual_keypts]:
|
893 |
-
# if comp is not None:
|
894 |
-
# if comp[:21, :].sum() != 0:
|
895 |
-
# comp[:21, 0] = opts.image_size[1] - comp[:21, 0]
|
896 |
-
# if comp[21:, :].sum() != 0:
|
897 |
-
# comp[21:, 0] = opts.image_size[1] - comp[21:, 0]
|
898 |
if keypts is not None:
|
899 |
if keypts[:21, :].sum() != 0:
|
900 |
keypts[:21, 0] = opts.image_size[1] - keypts[:21, 0]
|
@@ -980,6 +956,10 @@ def fix_clear_all():
|
|
980 |
def enable_component(image1, image2):
|
981 |
if image1 is None or image2 is None:
|
982 |
return gr.update(interactive=False)
|
|
|
|
|
|
|
|
|
983 |
if isinstance(image1, dict) and "background" in image1 and "layers" in image1 and "composite" in image1:
|
984 |
if image1["background"] is None or (
|
985 |
image1["background"].sum() == 0
|
@@ -1109,61 +1089,14 @@ example_ref_imgs = [
|
|
1109 |
[
|
1110 |
"sample_images/sample4.jpg",
|
1111 |
],
|
1112 |
-
# [
|
1113 |
-
# "sample_images/sample5.jpg",
|
1114 |
-
# ],
|
1115 |
[
|
1116 |
"sample_images/sample6.jpg",
|
1117 |
],
|
1118 |
-
# [
|
1119 |
-
# "sample_images/sample7.jpg",
|
1120 |
-
# ],
|
1121 |
-
# [
|
1122 |
-
# "sample_images/sample8.jpg",
|
1123 |
-
# ],
|
1124 |
-
# [
|
1125 |
-
# "sample_images/sample9.jpg",
|
1126 |
-
# ],
|
1127 |
-
# [
|
1128 |
-
# "sample_images/sample10.jpg",
|
1129 |
-
# ],
|
1130 |
-
# [
|
1131 |
-
# "sample_images/sample11.jpg",
|
1132 |
-
# ],
|
1133 |
-
# ["pose_images/pose1.jpg"],
|
1134 |
-
# ["pose_images/pose2.jpg"],
|
1135 |
-
# ["pose_images/pose3.jpg"],
|
1136 |
-
# ["pose_images/pose4.jpg"],
|
1137 |
-
# ["pose_images/pose5.jpg"],
|
1138 |
-
# ["pose_images/pose6.jpg"],
|
1139 |
-
# ["pose_images/pose7.jpg"],
|
1140 |
-
# ["pose_images/pose8.jpg"],
|
1141 |
]
|
1142 |
example_target_imgs = [
|
1143 |
-
# [
|
1144 |
-
# "sample_images/sample1.jpg",
|
1145 |
-
# ],
|
1146 |
-
# [
|
1147 |
-
# "sample_images/sample2.jpg",
|
1148 |
-
# ],
|
1149 |
-
# [
|
1150 |
-
# "sample_images/sample3.jpg",
|
1151 |
-
# ],
|
1152 |
-
# [
|
1153 |
-
# "sample_images/sample4.jpg",
|
1154 |
-
# ],
|
1155 |
[
|
1156 |
"sample_images/sample5.jpg",
|
1157 |
],
|
1158 |
-
# [
|
1159 |
-
# "sample_images/sample6.jpg",
|
1160 |
-
# ],
|
1161 |
-
# [
|
1162 |
-
# "sample_images/sample7.jpg",
|
1163 |
-
# ],
|
1164 |
-
# [
|
1165 |
-
# "sample_images/sample8.jpg",
|
1166 |
-
# ],
|
1167 |
[
|
1168 |
"sample_images/sample9.jpg",
|
1169 |
],
|
@@ -1174,40 +1107,22 @@ example_target_imgs = [
|
|
1174 |
"sample_images/sample11.jpg",
|
1175 |
],
|
1176 |
["pose_images/pose1.jpg"],
|
1177 |
-
# ["pose_images/pose2.jpg"],
|
1178 |
-
# ["pose_images/pose3.jpg"],
|
1179 |
-
# ["pose_images/pose4.jpg"],
|
1180 |
-
# ["pose_images/pose5.jpg"],
|
1181 |
-
# ["pose_images/pose6.jpg"],
|
1182 |
-
# ["pose_images/pose7.jpg"],
|
1183 |
-
# ["pose_images/pose8.jpg"],
|
1184 |
]
|
1185 |
fix_example_imgs = [
|
1186 |
-
["bad_hands/1.jpg"],
|
1187 |
-
|
1188 |
-
["bad_hands/
|
1189 |
-
["bad_hands/
|
1190 |
-
["bad_hands/
|
1191 |
-
["bad_hands/
|
1192 |
-
["bad_hands/7.jpg"], # "bad_hands/7_mask.jpg"],
|
1193 |
-
# ["bad_hands/8.jpg"], # "bad_hands/8_mask.jpg"],
|
1194 |
-
# ["bad_hands/9.jpg"], # "bad_hands/9_mask.jpg"],
|
1195 |
-
# ["bad_hands/10.jpg"], # "bad_hands/10_mask.jpg"],
|
1196 |
-
# ["bad_hands/11.jpg"], # "bad_hands/11_mask.jpg"],
|
1197 |
-
# ["bad_hands/12.jpg"], # "bad_hands/12_mask.jpg"],
|
1198 |
-
# ["bad_hands/13.jpg"], # "bad_hands/13_mask.jpg"],
|
1199 |
-
# ["bad_hands/14.jpg"],
|
1200 |
-
# ["bad_hands/15.jpg"],
|
1201 |
]
|
1202 |
fix_example_brush = [
|
1203 |
-
["bad_hands/1_composite.png"]
|
1204 |
-
["bad_hands/3_composite.png"]
|
1205 |
-
["bad_hands/4_composite.png"]
|
1206 |
-
["bad_hands/5_composite.png"]
|
1207 |
-
["bad_hands/6_composite.png"]
|
1208 |
-
["bad_hands/7_composite.png"]
|
1209 |
-
# ["bad_hands/14_mask.jpg"],
|
1210 |
-
# ["bad_hands/15_mask.jpg"],
|
1211 |
]
|
1212 |
fix_example_kpts = [
|
1213 |
["bad_hands/1_kpts.png", 3.0, 1224],
|
@@ -1217,9 +1132,20 @@ fix_example_kpts = [
|
|
1217 |
["bad_hands/6_kpts.png", 3.0, 1348],
|
1218 |
["bad_hands/7_kpts.png", 3.0, 42],
|
1219 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1220 |
for i in range(len(fix_example_kpts)):
|
1221 |
npy_path = fix_example_kpts[i][0].replace("_kpts.png", ".npy")
|
1222 |
fix_example_kpts[i].append(npy_path)
|
|
|
|
|
|
|
1223 |
|
1224 |
custom_css = """
|
1225 |
.gradio-container .examples img {
|
@@ -1248,6 +1174,18 @@ custom_css = """
|
|
1248 |
#kpts_examples table tr td:nth-child(4) {
|
1249 |
display: none !important;
|
1250 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1251 |
#repose_tutorial video {
|
1252 |
width: 70% !important;
|
1253 |
display: block;
|
@@ -1256,10 +1194,35 @@ custom_css = """
|
|
1256 |
}
|
1257 |
"""
|
1258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1259 |
_HEADER_ = '''
|
1260 |
<div style="text-align: center;">
|
1261 |
<h1><b>FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation</b></h1>
|
1262 |
-
<h2 style="color: #777777;">CVPR 2025 <span style="color: #990000; font-style: italic;">
|
1263 |
<style>
|
1264 |
.link-spacing {
|
1265 |
margin-right: 20px;
|
@@ -1280,8 +1243,7 @@ _HEADER_ = '''
|
|
1280 |
<h3>
|
1281 |
<a href='https://arxiv.org/abs/2412.02690' target='_blank' class="link-spacing">Paper</a>
|
1282 |
<a href='https://ivl.cs.brown.edu/research/foundhand.html' target='_blank' class="link-spacing">Project Page</a>
|
1283 |
-
<a href='' target='_blank' class="link-spacing">Code</a>
|
1284 |
-
<a href='' target='_blank'>Model Weights</a>
|
1285 |
</h3>
|
1286 |
<p>Below are two important abilities of our model. First, we can automatically <b>fix malformed hand images</b>, following the user-provided target hand pose and area to fix. Second, we can <b>repose hand</b> given two hand images - one is the image to edit, and the other one provides target hand pose.</p>
|
1287 |
</div>
|
@@ -1323,21 +1285,23 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1323 |
gr.Markdown("""<p style="text-align: center; font-size: 20px; font-weight: bold;">Tutorial Videos of Demo 1</p>""")
|
1324 |
with gr.Row(variant="panel"):
|
1325 |
with gr.Column():
|
1326 |
-
gr.Video(
|
1327 |
-
"how_to_videos/subtitled_fix_hands_custom.mp4",
|
1328 |
-
label="Using your own image",
|
1329 |
-
autoplay=True,
|
1330 |
-
loop=True,
|
1331 |
-
show_label=True,
|
1332 |
-
)
|
|
|
1333 |
with gr.Column():
|
1334 |
-
gr.Video(
|
1335 |
-
|
1336 |
-
|
1337 |
-
|
1338 |
-
|
1339 |
-
|
1340 |
-
)
|
|
|
1341 |
|
1342 |
# more options
|
1343 |
with gr.Accordion(label="More options", open=False):
|
@@ -1392,20 +1356,6 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1392 |
gr.Markdown(
|
1393 |
"""<p style="text-align: center;">Optionally crop the image.<br>(Click <b>top left</b> and <b>bottom right</b> of your desired bounding box around the hand)</p>"""
|
1394 |
)
|
1395 |
-
# fix_crop = gr.ImageEditor(
|
1396 |
-
# type="numpy",
|
1397 |
-
# sources=["upload", "webcam", "clipboard"],
|
1398 |
-
# label="Image crop",
|
1399 |
-
# show_label=True,
|
1400 |
-
# height=LENGTH,
|
1401 |
-
# width=LENGTH,
|
1402 |
-
# layers=False,
|
1403 |
-
# # crop_size="1:1",
|
1404 |
-
# transforms=(),
|
1405 |
-
# brush=False,
|
1406 |
-
# image_mode="RGBA",
|
1407 |
-
# container=False,
|
1408 |
-
# )
|
1409 |
fix_crop = gr.Image(
|
1410 |
type="numpy",
|
1411 |
sources=["upload", "webcam", "clipboard"],
|
@@ -1420,23 +1370,11 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1420 |
gr.Markdown(
|
1421 |
"""<p style="text-align: center;">💡 If you crop, the model can focus on more details of the cropped area. Square crops might work better than rectangle crops.</p>"""
|
1422 |
)
|
1423 |
-
#
|
1424 |
-
#
|
1425 |
-
#
|
1426 |
-
#
|
1427 |
-
# height=LENGTH,
|
1428 |
-
# width=LENGTH,
|
1429 |
-
# interactive=True,
|
1430 |
-
# visible=True,
|
1431 |
-
# sources=[],
|
1432 |
# )
|
1433 |
-
fix_example = gr.Examples(
|
1434 |
-
fix_example_imgs,
|
1435 |
-
inputs=[fix_crop],
|
1436 |
-
examples_per_page=20,
|
1437 |
-
# run_on_click=True,
|
1438 |
-
# fn=load_brush,
|
1439 |
-
)
|
1440 |
with gr.Column():
|
1441 |
gr.Markdown(
|
1442 |
"""<p style="text-align: center; font-size: 18px; font-weight: bold;">2. Brush wrong finger and its surrounding area</p>"""
|
@@ -1460,19 +1398,10 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1460 |
container=False,
|
1461 |
interactive=True,
|
1462 |
)
|
1463 |
-
fix_ex_brush = gr.Examples(
|
1464 |
-
|
1465 |
-
|
1466 |
-
|
1467 |
-
examples_per_page=20,
|
1468 |
-
# run_on_click=True,
|
1469 |
-
# fn=inpaint_from_example,
|
1470 |
-
)
|
1471 |
-
# gr.Markdown(
|
1472 |
-
# """<p style="text-align: center;">③ Hit the \"Finish Cropping & Brushing\" button</p>"""
|
1473 |
-
# )
|
1474 |
-
# fix_finish_crop = gr.Button(
|
1475 |
-
# value="Finish Croping & Brushing", interactive=False
|
1476 |
# )
|
1477 |
|
1478 |
# keypoint selection
|
@@ -1485,8 +1414,8 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1485 |
)
|
1486 |
fix_kp_all = gr.Image(
|
1487 |
type="numpy",
|
1488 |
-
|
1489 |
-
show_label=
|
1490 |
height=LENGTH,
|
1491 |
width=LENGTH,
|
1492 |
interactive=False,
|
@@ -1494,14 +1423,14 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1494 |
sources=(),
|
1495 |
image_mode="RGBA"
|
1496 |
)
|
1497 |
-
with gr.Accordion(open=True):
|
1498 |
-
|
1499 |
-
|
1500 |
-
|
1501 |
-
|
1502 |
-
|
1503 |
-
|
1504 |
-
|
1505 |
with gr.Accordion("[Custom data] Manually give hand pose", open=False):
|
1506 |
gr.Markdown(
|
1507 |
"""<p style="text-align: center;">① Tell us if this is right, left, or both hands</p>"""
|
@@ -1515,10 +1444,6 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1515 |
"""<p style="text-align: center;">② Click 21 keypoints on the image to provide the target hand pose of <b>right hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
|
1516 |
visible=False
|
1517 |
)
|
1518 |
-
# fix_kp_r_info = gr.Markdown(
|
1519 |
-
# """<p style="text-align: center; font-size: 20px; font-weight: bold; ">Select right only</p>""",
|
1520 |
-
# visible=False,
|
1521 |
-
# )
|
1522 |
fix_kp_right = gr.Image(
|
1523 |
type="numpy",
|
1524 |
label="Keypoint Selection (right hand)",
|
@@ -1569,21 +1494,11 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1569 |
interactive=False,
|
1570 |
)
|
1571 |
|
1572 |
-
# get latent
|
1573 |
-
# with gr.Column():
|
1574 |
-
|
1575 |
# result column
|
1576 |
with gr.Column():
|
1577 |
gr.Markdown(
|
1578 |
"""<p style="text-align: center; font-size: 18px; font-weight: bold;">4. Press "Run" to get the corrected hand image 🎯</p>"""
|
1579 |
)
|
1580 |
-
# gr.Markdown(
|
1581 |
-
# """<p style="text-align: center; font-size: 18px; font-weight: bold;">3. Press "Ready" to start pre-processing</p>"""
|
1582 |
-
# )
|
1583 |
-
# fix_ready = gr.Button(value="Ready", interactive=False)
|
1584 |
-
# gr.Markdown(
|
1585 |
-
# """<p style="text-align: center; font-weight: bold; ">Visualized (256, 256)-resized, brushed image</p>"""
|
1586 |
-
# )
|
1587 |
fix_vis_mask32 = gr.Image(
|
1588 |
type="numpy",
|
1589 |
label=f"Visualized {opts.latent_size} Inpaint Mask",
|
@@ -1603,9 +1518,6 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1603 |
interactive=False,
|
1604 |
visible=True,
|
1605 |
)
|
1606 |
-
# gr.Markdown(
|
1607 |
-
# """<p style="text-align: center;">[NOTE] Above should be inpaint mask that you brushed, NOT the segmentation mask of the entire hand. </p>"""
|
1608 |
-
# )
|
1609 |
gr.Markdown(
|
1610 |
"""<p style="text-align: center;">⚠️ >3min and ~24GB per generation</p>"""
|
1611 |
)
|
@@ -1645,7 +1557,14 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1645 |
)
|
1646 |
fix_clear = gr.ClearButton()
|
1647 |
|
1648 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1649 |
# listeners
|
1650 |
fix_crop.change(stash_original, fix_crop, fix_original) # fix_original: (real_H, real_W, 3)
|
1651 |
fix_crop.change(stay_crop, [fix_crop, fix_crop_coord], [fix_crop_coord, fix_ref])
|
@@ -1713,6 +1632,8 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1713 |
reset_kps, [fix_img, fix_kpts, gr.State("left")], [fix_kp_left, fix_kpts]
|
1714 |
)
|
1715 |
fix_kpts_path.change(read_kpts, fix_kpts_path, fix_kpts_np)
|
|
|
|
|
1716 |
fix_run.click(
|
1717 |
ready_sample,
|
1718 |
[fix_ref, fix_inpaint_mask, fix_kpts, fix_kpts_np],
|
@@ -1820,13 +1741,14 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1820 |
gr.Markdown("""<p style="text-align: center; font-size: 20px; font-weight: bold;">Tutorial Videos of Demo 2</p>""")
|
1821 |
with gr.Row(variant="panel", elem_id="repose_tutorial"):
|
1822 |
with gr.Column():
|
1823 |
-
gr.Video(
|
1824 |
-
|
1825 |
-
|
1826 |
-
|
1827 |
-
|
1828 |
-
|
1829 |
-
)
|
|
|
1830 |
|
1831 |
# main tabs
|
1832 |
with gr.Row():
|
|
|
473 |
img = ref["background"][..., :3]
|
474 |
|
475 |
# viualization
|
|
|
476 |
mask = inpainted < 128
|
|
|
|
|
|
|
|
|
477 |
img = mask_image(img, mask)
|
478 |
+
if inpaint_mask.sum() == 0:
|
479 |
+
gr.Warning("Run botton not enabled? Please try again.", duration=10)
|
480 |
return img, inpaint_mask
|
481 |
|
482 |
def get_kps(img, keypoints, side: Literal["right", "left"], evt: gr.SelectData):
|
|
|
578 |
cropped_vis[:,:,-1] = alpha
|
579 |
else:
|
580 |
gr.Error("Something is wrong", duration=3)
|
|
|
|
|
|
|
|
|
|
|
581 |
return crop_coord, cropped, cropped_vis
|
582 |
|
583 |
def disable_crop(crop_coord):
|
|
|
645 |
keypts[0] = np.zeros((21, 2))
|
646 |
elif len(keypts[0]) == 21:
|
647 |
keypts[0] = np.array(keypts[0], dtype=np.float32)
|
|
|
|
|
648 |
else:
|
649 |
gr.Info("Number of right hand keypoints should be either 0 or 21.")
|
650 |
return None, None
|
|
|
652 |
keypts[1] = np.zeros((21, 2))
|
653 |
elif len(keypts[1]) == 21:
|
654 |
keypts[1] = np.array(keypts[1], dtype=np.float32)
|
|
|
|
|
655 |
else:
|
656 |
gr.Info("Number of left hand keypoints should be either 0 or 21.")
|
657 |
return None, None
|
|
|
856 |
return
|
857 |
img["composite"] = img["composite"][:, ::-1, :]
|
858 |
img["background"] = img["background"][:, ::-1, :]
|
859 |
+
img["layers"] = [layer[:, ::-1, :] for layer in img["layers"]]
|
|
|
|
|
|
|
|
|
|
|
|
|
860 |
if img_raw is not None:
|
861 |
img_raw = img_raw[:, ::-1, :]
|
862 |
pose_img = pose_img[:, ::-1, :]
|
|
|
871 |
auto_cond = auto_cond.flip(-1)
|
872 |
if manual_cond is not None:
|
873 |
manual_cond = manual_cond.flip(-1)
|
|
|
|
|
|
|
|
|
|
|
|
|
874 |
if keypts is not None:
|
875 |
if keypts[:21, :].sum() != 0:
|
876 |
keypts[:21, 0] = opts.image_size[1] - keypts[:21, 0]
|
|
|
956 |
def enable_component(image1, image2):
|
957 |
if image1 is None or image2 is None:
|
958 |
return gr.update(interactive=False)
|
959 |
+
if isinstance(image1, np.ndarray) and image1.sum() == 0:
|
960 |
+
return gr.update(interactive=False)
|
961 |
+
if isinstance(image2, np.ndarray) and image2.sum() == 0:
|
962 |
+
return gr.update(interactive=False)
|
963 |
if isinstance(image1, dict) and "background" in image1 and "layers" in image1 and "composite" in image1:
|
964 |
if image1["background"] is None or (
|
965 |
image1["background"].sum() == 0
|
|
|
1089 |
[
|
1090 |
"sample_images/sample4.jpg",
|
1091 |
],
|
|
|
|
|
|
|
1092 |
[
|
1093 |
"sample_images/sample6.jpg",
|
1094 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1095 |
]
|
1096 |
example_target_imgs = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1097 |
[
|
1098 |
"sample_images/sample5.jpg",
|
1099 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1100 |
[
|
1101 |
"sample_images/sample9.jpg",
|
1102 |
],
|
|
|
1107 |
"sample_images/sample11.jpg",
|
1108 |
],
|
1109 |
["pose_images/pose1.jpg"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1110 |
]
|
1111 |
fix_example_imgs = [
|
1112 |
+
["bad_hands/1.jpg"],
|
1113 |
+
["bad_hands/3.jpg"],
|
1114 |
+
["bad_hands/4.jpg"],
|
1115 |
+
["bad_hands/5.jpg"],
|
1116 |
+
["bad_hands/6.jpg"],
|
1117 |
+
["bad_hands/7.jpg"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1118 |
]
|
1119 |
fix_example_brush = [
|
1120 |
+
["bad_hands/1_composite.png"],
|
1121 |
+
["bad_hands/3_composite.png"],
|
1122 |
+
["bad_hands/4_composite.png"],
|
1123 |
+
["bad_hands/5_composite.png"],
|
1124 |
+
["bad_hands/6_composite.png"],
|
1125 |
+
["bad_hands/7_composite.png"],
|
|
|
|
|
1126 |
]
|
1127 |
fix_example_kpts = [
|
1128 |
["bad_hands/1_kpts.png", 3.0, 1224],
|
|
|
1132 |
["bad_hands/6_kpts.png", 3.0, 1348],
|
1133 |
["bad_hands/7_kpts.png", 3.0, 42],
|
1134 |
]
|
1135 |
+
fix_example_all = [
|
1136 |
+
["bad_hands/1.jpg", "bad_hands/1_composite.png", "bad_hands/1_kpts.png", 3.0, 1224],
|
1137 |
+
["bad_hands/3.jpg", "bad_hands/3_composite.png", "bad_hands/3_kpts.png", 1.0, 42],
|
1138 |
+
["bad_hands/4.jpg", "bad_hands/4_composite.png", "bad_hands/4_kpts.png", 2.0, 42],
|
1139 |
+
["bad_hands/5.jpg", "bad_hands/5_composite.png", "bad_hands/5_kpts.png", 3.0, 42],
|
1140 |
+
["bad_hands/6.jpg", "bad_hands/6_composite.png", "bad_hands/6_kpts.png", 3.0, 1348],
|
1141 |
+
["bad_hands/7.jpg", "bad_hands/7_composite.png", "bad_hands/7_kpts.png", 3.0, 42],
|
1142 |
+
]
|
1143 |
for i in range(len(fix_example_kpts)):
|
1144 |
npy_path = fix_example_kpts[i][0].replace("_kpts.png", ".npy")
|
1145 |
fix_example_kpts[i].append(npy_path)
|
1146 |
+
for i in range(len(fix_example_all)):
|
1147 |
+
npy_path = fix_example_all[i][2].replace("_kpts.png", ".npy")
|
1148 |
+
fix_example_all[i].append(npy_path)
|
1149 |
|
1150 |
custom_css = """
|
1151 |
.gradio-container .examples img {
|
|
|
1174 |
#kpts_examples table tr td:nth-child(4) {
|
1175 |
display: none !important;
|
1176 |
}
|
1177 |
+
#fix_examples_all table tr th:nth-child(4),
|
1178 |
+
#fix_examples_all table tr td:nth-child(4) {
|
1179 |
+
display: none !important;
|
1180 |
+
}
|
1181 |
+
#fix_examples_all table tr th:nth-child(5),
|
1182 |
+
#fix_examples_all table tr td:nth-child(5) {
|
1183 |
+
display: none !important;
|
1184 |
+
}
|
1185 |
+
#fix_examples_all table tr th:nth-child(6),
|
1186 |
+
#fix_examples_all table tr td:nth-child(6) {
|
1187 |
+
display: none !important;
|
1188 |
+
}
|
1189 |
#repose_tutorial video {
|
1190 |
width: 70% !important;
|
1191 |
display: block;
|
|
|
1194 |
}
|
1195 |
"""
|
1196 |
|
1197 |
+
tut1_custom = f"""
|
1198 |
+
<iframe style="width:100%; aspect-ratio: 12/9;"
|
1199 |
+
src="https://www.youtube.com/embed/fQk7cOjSCVc"
|
1200 |
+
title="Using your own image" frameborder="0"
|
1201 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
|
1202 |
+
allowfullscreen>
|
1203 |
+
</iframe>
|
1204 |
+
"""
|
1205 |
+
tut1_example = f"""
|
1206 |
+
<iframe style="width:100%; aspect-ratio: 12/9;"
|
1207 |
+
src="https://www.youtube.com/embed/-Dq0XTYwTHA"
|
1208 |
+
title="Using your own image" frameborder="0"
|
1209 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
|
1210 |
+
allowfullscreen>
|
1211 |
+
</iframe>
|
1212 |
+
"""
|
1213 |
+
tut2_example = f"""
|
1214 |
+
<iframe style="width:50%; aspect-ratio: 12/9; display:block; margin-left:auto; margin-right:auto;"
|
1215 |
+
src="https://www.youtube.com/embed/y2CbzUG2uM0"
|
1216 |
+
title="Using your own image" frameborder="0"
|
1217 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
|
1218 |
+
allowfullscreen>
|
1219 |
+
</iframe>
|
1220 |
+
"""
|
1221 |
+
|
1222 |
_HEADER_ = '''
|
1223 |
<div style="text-align: center;">
|
1224 |
<h1><b>FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation</b></h1>
|
1225 |
+
<h2 style="color: #777777;">CVPR 2025 <span style="color: #990000; font-style: italic;">Highlight</span></h2>
|
1226 |
<style>
|
1227 |
.link-spacing {
|
1228 |
margin-right: 20px;
|
|
|
1243 |
<h3>
|
1244 |
<a href='https://arxiv.org/abs/2412.02690' target='_blank' class="link-spacing">Paper</a>
|
1245 |
<a href='https://ivl.cs.brown.edu/research/foundhand.html' target='_blank' class="link-spacing">Project Page</a>
|
1246 |
+
<a href='' target='_blank' class="link-spacing">Code (Coming in June)</a>
|
|
|
1247 |
</h3>
|
1248 |
<p>Below are two important abilities of our model. First, we can automatically <b>fix malformed hand images</b>, following the user-provided target hand pose and area to fix. Second, we can <b>repose hand</b> given two hand images - one is the image to edit, and the other one provides target hand pose.</p>
|
1249 |
</div>
|
|
|
1285 |
gr.Markdown("""<p style="text-align: center; font-size: 20px; font-weight: bold;">Tutorial Videos of Demo 1</p>""")
|
1286 |
with gr.Row(variant="panel"):
|
1287 |
with gr.Column():
|
1288 |
+
# gr.Video(
|
1289 |
+
# "how_to_videos/subtitled_fix_hands_custom.mp4",
|
1290 |
+
# label="Using your own image",
|
1291 |
+
# autoplay=True,
|
1292 |
+
# loop=True,
|
1293 |
+
# show_label=True,
|
1294 |
+
# )
|
1295 |
+
gr.HTML(tut1_custom)
|
1296 |
with gr.Column():
|
1297 |
+
# gr.Video(
|
1298 |
+
# "how_to_videos/subtitled_fix_hands_example.mp4",
|
1299 |
+
# label="Using our example image",
|
1300 |
+
# autoplay=True,
|
1301 |
+
# loop=True,
|
1302 |
+
# show_label=True,
|
1303 |
+
# )
|
1304 |
+
gr.HTML(tut1_example)
|
1305 |
|
1306 |
# more options
|
1307 |
with gr.Accordion(label="More options", open=False):
|
|
|
1356 |
gr.Markdown(
|
1357 |
"""<p style="text-align: center;">Optionally crop the image.<br>(Click <b>top left</b> and <b>bottom right</b> of your desired bounding box around the hand)</p>"""
|
1358 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1359 |
fix_crop = gr.Image(
|
1360 |
type="numpy",
|
1361 |
sources=["upload", "webcam", "clipboard"],
|
|
|
1370 |
gr.Markdown(
|
1371 |
"""<p style="text-align: center;">💡 If you crop, the model can focus on more details of the cropped area. Square crops might work better than rectangle crops.</p>"""
|
1372 |
)
|
1373 |
+
# fix_example = gr.Examples(
|
1374 |
+
# fix_example_imgs,
|
1375 |
+
# inputs=[fix_crop],
|
1376 |
+
# examples_per_page=20,
|
|
|
|
|
|
|
|
|
|
|
1377 |
# )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1378 |
with gr.Column():
|
1379 |
gr.Markdown(
|
1380 |
"""<p style="text-align: center; font-size: 18px; font-weight: bold;">2. Brush wrong finger and its surrounding area</p>"""
|
|
|
1398 |
container=False,
|
1399 |
interactive=True,
|
1400 |
)
|
1401 |
+
# fix_ex_brush = gr.Examples(
|
1402 |
+
# fix_example_brush,
|
1403 |
+
# inputs=[fix_ref],
|
1404 |
+
# examples_per_page=20,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1405 |
# )
|
1406 |
|
1407 |
# keypoint selection
|
|
|
1414 |
)
|
1415 |
fix_kp_all = gr.Image(
|
1416 |
type="numpy",
|
1417 |
+
label="Target Hand Pose",
|
1418 |
+
show_label=True,
|
1419 |
height=LENGTH,
|
1420 |
width=LENGTH,
|
1421 |
interactive=False,
|
|
|
1423 |
sources=(),
|
1424 |
image_mode="RGBA"
|
1425 |
)
|
1426 |
+
# with gr.Accordion(open=True):
|
1427 |
+
# fix_ex_kpts = gr.Examples(
|
1428 |
+
# fix_example_kpts,
|
1429 |
+
# inputs=[fix_kp_all, fix_cfg, fix_seed, fix_kpts_path],
|
1430 |
+
# examples_per_page=20,
|
1431 |
+
# postprocess=False,
|
1432 |
+
# elem_id="kpts_examples"
|
1433 |
+
# )
|
1434 |
with gr.Accordion("[Custom data] Manually give hand pose", open=False):
|
1435 |
gr.Markdown(
|
1436 |
"""<p style="text-align: center;">① Tell us if this is right, left, or both hands</p>"""
|
|
|
1444 |
"""<p style="text-align: center;">② Click 21 keypoints on the image to provide the target hand pose of <b>right hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
|
1445 |
visible=False
|
1446 |
)
|
|
|
|
|
|
|
|
|
1447 |
fix_kp_right = gr.Image(
|
1448 |
type="numpy",
|
1449 |
label="Keypoint Selection (right hand)",
|
|
|
1494 |
interactive=False,
|
1495 |
)
|
1496 |
|
|
|
|
|
|
|
1497 |
# result column
|
1498 |
with gr.Column():
|
1499 |
gr.Markdown(
|
1500 |
"""<p style="text-align: center; font-size: 18px; font-weight: bold;">4. Press "Run" to get the corrected hand image 🎯</p>"""
|
1501 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1502 |
fix_vis_mask32 = gr.Image(
|
1503 |
type="numpy",
|
1504 |
label=f"Visualized {opts.latent_size} Inpaint Mask",
|
|
|
1518 |
interactive=False,
|
1519 |
visible=True,
|
1520 |
)
|
|
|
|
|
|
|
1521 |
gr.Markdown(
|
1522 |
"""<p style="text-align: center;">⚠️ >3min and ~24GB per generation</p>"""
|
1523 |
)
|
|
|
1557 |
)
|
1558 |
fix_clear = gr.ClearButton()
|
1559 |
|
1560 |
+
gr.Examples(
|
1561 |
+
fix_example_all,
|
1562 |
+
inputs=[fix_crop, fix_ref, fix_kp_all, fix_cfg, fix_seed, fix_kpts_path],
|
1563 |
+
examples_per_page=20,
|
1564 |
+
postprocess=False,
|
1565 |
+
elem_id="fix_examples_all",
|
1566 |
+
)
|
1567 |
+
|
1568 |
# listeners
|
1569 |
fix_crop.change(stash_original, fix_crop, fix_original) # fix_original: (real_H, real_W, 3)
|
1570 |
fix_crop.change(stay_crop, [fix_crop, fix_crop_coord], [fix_crop_coord, fix_ref])
|
|
|
1632 |
reset_kps, [fix_img, fix_kpts, gr.State("left")], [fix_kp_left, fix_kpts]
|
1633 |
)
|
1634 |
fix_kpts_path.change(read_kpts, fix_kpts_path, fix_kpts_np)
|
1635 |
+
fix_inpaint_mask.change(enable_component, [fix_inpaint_mask, fix_kpts_np], fix_run)
|
1636 |
+
fix_kpts_np.change(enable_component, [fix_inpaint_mask, fix_kpts_np], fix_run)
|
1637 |
fix_run.click(
|
1638 |
ready_sample,
|
1639 |
[fix_ref, fix_inpaint_mask, fix_kpts, fix_kpts_np],
|
|
|
1741 |
gr.Markdown("""<p style="text-align: center; font-size: 20px; font-weight: bold;">Tutorial Videos of Demo 2</p>""")
|
1742 |
with gr.Row(variant="panel", elem_id="repose_tutorial"):
|
1743 |
with gr.Column():
|
1744 |
+
# gr.Video(
|
1745 |
+
# "how_to_videos/subtitled_repose_hands.mp4",
|
1746 |
+
# label="Tutorial",
|
1747 |
+
# autoplay=True,
|
1748 |
+
# loop=True,
|
1749 |
+
# show_label=True,
|
1750 |
+
# )
|
1751 |
+
gr.HTML(tut2_example)
|
1752 |
|
1753 |
# main tabs
|
1754 |
with gr.Row():
|