diff --git a/.gitignore b/.gitignore index e1958f72f28f9a0c64b98eae9a18346fc604c39e..8e994243ec63c9302abe0c5e8c660148385ee5ef 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ __pycache__/ *.py[cod] *$py.class - +.DS_Store # C extensions *.so diff --git a/README.md b/README.md index 6a7d28e5bc1eead243670220e6d55990f74e47ce..e1378d3b8761bdb8254d0f2c7df2983a4820cb97 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ colorFrom: purple colorTo: red sdk: gradio sdk_version: 4.44.1 -app_file: hg_app.py +app_file: gradio_app.py pinned: false short_description: Text-to-3D and Image-to-3D Generation models: diff --git a/assets/env_maps/gradient.jpg b/assets/env_maps/gradient.jpg new file mode 100644 index 0000000000000000000000000000000000000000..55546c1f260daa7d3c6eef36b70fe5d7e1697df0 Binary files /dev/null and b/assets/env_maps/gradient.jpg differ diff --git a/assets/env_maps/white.jpg b/assets/env_maps/white.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f7af1237608dd1d486edb9298c04fbe15ec74185 Binary files /dev/null and b/assets/env_maps/white.jpg differ diff --git a/assets/example_images/004.png b/assets/example_images/004.png old mode 100644 new mode 100755 diff --git a/assets/example_images/052.png b/assets/example_images/052.png old mode 100644 new mode 100755 diff --git a/assets/example_images/073.png b/assets/example_images/073.png old mode 100644 new mode 100755 diff --git a/assets/example_images/075.png b/assets/example_images/075.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1008.png b/assets/example_images/1008.png old mode 100644 new mode 100755 diff --git a/assets/example_images/101.png b/assets/example_images/101.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1022.png b/assets/example_images/1022.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1029.png b/assets/example_images/1029.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1037.png b/assets/example_images/1037.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1079.png b/assets/example_images/1079.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1111.png b/assets/example_images/1111.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1123.png b/assets/example_images/1123.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1128.png b/assets/example_images/1128.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1135.png b/assets/example_images/1135.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1146.png b/assets/example_images/1146.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1148.png b/assets/example_images/1148.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1154.png b/assets/example_images/1154.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1180.png b/assets/example_images/1180.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1196.png b/assets/example_images/1196.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1204.png b/assets/example_images/1204.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1234.png b/assets/example_images/1234.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1310.png b/assets/example_images/1310.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1316.png b/assets/example_images/1316.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1354.png b/assets/example_images/1354.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1429.png b/assets/example_images/1429.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1493.png b/assets/example_images/1493.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1582.png b/assets/example_images/1582.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1583.png b/assets/example_images/1583.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1596.png b/assets/example_images/1596.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1601.png b/assets/example_images/1601.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1603.png b/assets/example_images/1603.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1626.png b/assets/example_images/1626.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1627.png b/assets/example_images/1627.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1654.png b/assets/example_images/1654.png old mode 100644 new mode 100755 diff --git a/assets/example_images/167.png b/assets/example_images/167.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1670.png b/assets/example_images/1670.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1679.png b/assets/example_images/1679.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1687.png b/assets/example_images/1687.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1698.png b/assets/example_images/1698.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1715.png b/assets/example_images/1715.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1735.png b/assets/example_images/1735.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1738.png b/assets/example_images/1738.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1744.png b/assets/example_images/1744.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1758.png b/assets/example_images/1758.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1772.png b/assets/example_images/1772.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1773.png b/assets/example_images/1773.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1778.png b/assets/example_images/1778.png old mode 100644 new mode 100755 diff --git a/assets/example_images/179.png b/assets/example_images/179.png old mode 100644 new mode 100755 diff --git a/assets/example_images/1898.png b/assets/example_images/1898.png old mode 100644 new mode 100755 diff --git a/assets/example_images/191.png b/assets/example_images/191.png old mode 100644 new mode 100755 diff --git a/assets/example_images/195.png b/assets/example_images/195.png old mode 100644 new mode 100755 diff --git a/assets/example_images/197.png b/assets/example_images/197.png old mode 100644 new mode 100755 diff --git a/assets/example_images/198.png b/assets/example_images/198.png old mode 100644 new mode 100755 diff --git a/assets/example_images/202.png b/assets/example_images/202.png old mode 100644 new mode 100755 diff --git a/assets/example_images/203.png b/assets/example_images/203.png old mode 100644 new mode 100755 diff --git a/assets/example_images/218.png b/assets/example_images/218.png old mode 100644 new mode 100755 diff --git a/assets/example_images/219.png b/assets/example_images/219.png old mode 100644 new mode 100755 diff --git a/assets/example_images/379.png b/assets/example_images/379.png old mode 100644 new mode 100755 diff --git a/assets/example_images/380.png b/assets/example_images/380.png old mode 100644 new mode 100755 diff --git a/assets/example_images/419.png b/assets/example_images/419.png old mode 100644 new mode 100755 diff --git a/assets/example_images/583.png b/assets/example_images/583.png old mode 100644 new mode 100755 diff --git a/assets/example_images/888.png b/assets/example_images/888.png old mode 100644 new mode 100755 diff --git a/assets/example_images/895.png b/assets/example_images/895.png old mode 100644 new mode 100755 diff --git a/assets/example_images/example_000.png b/assets/example_images/example_000.png old mode 100644 new mode 100755 diff --git a/assets/example_images/example_002.png b/assets/example_images/example_002.png old mode 100644 new mode 100755 diff --git a/assets/example_mv_images/1/back.png b/assets/example_mv_images/1/back.png new file mode 100644 index 0000000000000000000000000000000000000000..b4e05098bbb96c51c6a4413957d91d8385670bd3 Binary files /dev/null and b/assets/example_mv_images/1/back.png differ diff --git a/assets/example_mv_images/1/front.png b/assets/example_mv_images/1/front.png new file mode 100644 index 0000000000000000000000000000000000000000..1417f8cf567224b8c56dc24da659184b3fb89bc9 Binary files /dev/null and b/assets/example_mv_images/1/front.png differ diff --git a/assets/example_mv_images/1/left.png b/assets/example_mv_images/1/left.png new file mode 100644 index 0000000000000000000000000000000000000000..ba76d3154b97be521d24d43a544563476a7b947c Binary files /dev/null and b/assets/example_mv_images/1/left.png differ diff --git a/assets/example_mv_images/10/back.png b/assets/example_mv_images/10/back.png new file mode 100644 index 0000000000000000000000000000000000000000..eef6ab06e7f3607a816717c58cad5e2bf9e2492a Binary files /dev/null and b/assets/example_mv_images/10/back.png differ diff --git a/assets/example_mv_images/10/front.png b/assets/example_mv_images/10/front.png new file mode 100644 index 0000000000000000000000000000000000000000..dda89b7a81936ffbc044d14d92086a73aaa7d80b Binary files /dev/null and b/assets/example_mv_images/10/front.png differ diff --git a/assets/example_mv_images/10/left.png b/assets/example_mv_images/10/left.png new file mode 100644 index 0000000000000000000000000000000000000000..e0579df655cbde20373e07ff9a219d58cd59910f Binary files /dev/null and b/assets/example_mv_images/10/left.png differ diff --git a/assets/example_mv_images/11/back.png b/assets/example_mv_images/11/back.png new file mode 100644 index 0000000000000000000000000000000000000000..b586caf6a15a34f66d649207e761b388c68cde77 Binary files /dev/null and b/assets/example_mv_images/11/back.png differ diff --git a/assets/example_mv_images/11/front.png b/assets/example_mv_images/11/front.png new file mode 100644 index 0000000000000000000000000000000000000000..595f9d6efa05e62469411286f9e126cc8c378f59 Binary files /dev/null and b/assets/example_mv_images/11/front.png differ diff --git a/assets/example_mv_images/11/left.png b/assets/example_mv_images/11/left.png new file mode 100644 index 0000000000000000000000000000000000000000..e83eccfdfe06c8667cf93a81dec0e7368d915336 Binary files /dev/null and b/assets/example_mv_images/11/left.png differ diff --git a/assets/example_mv_images/12/back.png b/assets/example_mv_images/12/back.png new file mode 100644 index 0000000000000000000000000000000000000000..c49e0fc43471bd5d2f070044958226b712c53258 Binary files /dev/null and b/assets/example_mv_images/12/back.png differ diff --git a/assets/example_mv_images/12/front.png b/assets/example_mv_images/12/front.png new file mode 100644 index 0000000000000000000000000000000000000000..148cd51aa47b0cf81fe30a2be09e581d78803a67 Binary files /dev/null and b/assets/example_mv_images/12/front.png differ diff --git a/assets/example_mv_images/12/left.png b/assets/example_mv_images/12/left.png new file mode 100644 index 0000000000000000000000000000000000000000..4fbdb357503ef9140679beb977dab84b6fbae925 Binary files /dev/null and b/assets/example_mv_images/12/left.png differ diff --git a/assets/example_mv_images/13/back.png b/assets/example_mv_images/13/back.png new file mode 100644 index 0000000000000000000000000000000000000000..26685f594bdac0907d4897c59bc51c299c354abc Binary files /dev/null and b/assets/example_mv_images/13/back.png differ diff --git a/assets/example_mv_images/13/front.png b/assets/example_mv_images/13/front.png new file mode 100644 index 0000000000000000000000000000000000000000..95053ac3ebf258b5ec9840ba1efc64d8eb853621 Binary files /dev/null and b/assets/example_mv_images/13/front.png differ diff --git a/assets/example_mv_images/13/left.png b/assets/example_mv_images/13/left.png new file mode 100644 index 0000000000000000000000000000000000000000..34fe6634efaa68963f54f1c96c0ef3a9ce324cff Binary files /dev/null and b/assets/example_mv_images/13/left.png differ diff --git a/assets/example_mv_images/14/back.png b/assets/example_mv_images/14/back.png new file mode 100644 index 0000000000000000000000000000000000000000..1a48313bb0d002c8b4328f446c85ab98d119306a Binary files /dev/null and b/assets/example_mv_images/14/back.png differ diff --git a/assets/example_mv_images/14/front.png b/assets/example_mv_images/14/front.png new file mode 100644 index 0000000000000000000000000000000000000000..3b58dfa99c4c574df6e2dc711db29cddae3ecfb1 Binary files /dev/null and b/assets/example_mv_images/14/front.png differ diff --git a/assets/example_mv_images/14/left.png b/assets/example_mv_images/14/left.png new file mode 100644 index 0000000000000000000000000000000000000000..9842b3afce8da368e765a23b3c331d5470651d34 Binary files /dev/null and b/assets/example_mv_images/14/left.png differ diff --git a/assets/example_mv_images/2/back.png b/assets/example_mv_images/2/back.png new file mode 100644 index 0000000000000000000000000000000000000000..88a05130fd9e66a8d47aba17daab335349fb71ec Binary files /dev/null and b/assets/example_mv_images/2/back.png differ diff --git a/assets/example_mv_images/2/front.png b/assets/example_mv_images/2/front.png new file mode 100644 index 0000000000000000000000000000000000000000..35c55ba662a6b7d56ece98db4e5d77ff0ef696f6 Binary files /dev/null and b/assets/example_mv_images/2/front.png differ diff --git a/assets/example_mv_images/2/left.png b/assets/example_mv_images/2/left.png new file mode 100644 index 0000000000000000000000000000000000000000..bd47d6666218a69b8ce03510f7898a07665cbbf8 Binary files /dev/null and b/assets/example_mv_images/2/left.png differ diff --git a/assets/example_mv_images/3/back.png b/assets/example_mv_images/3/back.png new file mode 100644 index 0000000000000000000000000000000000000000..98185fe597c945acadc91c96b0d7a4da84f1eaee Binary files /dev/null and b/assets/example_mv_images/3/back.png differ diff --git a/assets/example_mv_images/3/front.png b/assets/example_mv_images/3/front.png new file mode 100644 index 0000000000000000000000000000000000000000..1265af6287dc35c2932a5b32d7a88b9bfda89bf6 Binary files /dev/null and b/assets/example_mv_images/3/front.png differ diff --git a/assets/example_mv_images/3/left.png b/assets/example_mv_images/3/left.png new file mode 100644 index 0000000000000000000000000000000000000000..df83c19e2c61b09baaa0fdeb02c187d1292c1eef Binary files /dev/null and b/assets/example_mv_images/3/left.png differ diff --git a/assets/example_mv_images/4/back.png b/assets/example_mv_images/4/back.png new file mode 100644 index 0000000000000000000000000000000000000000..c818617090ab64610b93f3f5d0ee932f7b42be52 Binary files /dev/null and b/assets/example_mv_images/4/back.png differ diff --git a/assets/example_mv_images/4/front.png b/assets/example_mv_images/4/front.png new file mode 100644 index 0000000000000000000000000000000000000000..8758fd6e8993335f7ca8989c6016fa2a5d320389 Binary files /dev/null and b/assets/example_mv_images/4/front.png differ diff --git a/assets/example_mv_images/4/left.png b/assets/example_mv_images/4/left.png new file mode 100644 index 0000000000000000000000000000000000000000..584be7f0d23090c6fcf24637d33dea73c7504b20 Binary files /dev/null and b/assets/example_mv_images/4/left.png differ diff --git a/assets/example_mv_images/5/back.png b/assets/example_mv_images/5/back.png new file mode 100644 index 0000000000000000000000000000000000000000..71e53e10727609bb7c10f1af5534c8f0b2d9f672 Binary files /dev/null and b/assets/example_mv_images/5/back.png differ diff --git a/assets/example_mv_images/5/front.png b/assets/example_mv_images/5/front.png new file mode 100644 index 0000000000000000000000000000000000000000..041f4ac1aa2398d6e2645582d15ffe9d8351a845 Binary files /dev/null and b/assets/example_mv_images/5/front.png differ diff --git a/assets/example_mv_images/5/left.png b/assets/example_mv_images/5/left.png new file mode 100644 index 0000000000000000000000000000000000000000..2337b260261634d7d6deab2e4b787decc2461758 Binary files /dev/null and b/assets/example_mv_images/5/left.png differ diff --git a/assets/example_mv_images/6/back.png b/assets/example_mv_images/6/back.png new file mode 100644 index 0000000000000000000000000000000000000000..6ceb5d8ab294d234c2f4a7861e012653c57e4a8d Binary files /dev/null and b/assets/example_mv_images/6/back.png differ diff --git a/assets/example_mv_images/6/front.png b/assets/example_mv_images/6/front.png new file mode 100644 index 0000000000000000000000000000000000000000..95fc2c0187cadd47b18a8b005e9941ed4c5dee0e Binary files /dev/null and b/assets/example_mv_images/6/front.png differ diff --git a/assets/example_mv_images/6/left.png b/assets/example_mv_images/6/left.png new file mode 100644 index 0000000000000000000000000000000000000000..944a731239630512747b5bcef82091bd04336b69 Binary files /dev/null and b/assets/example_mv_images/6/left.png differ diff --git a/assets/example_mv_images/7/back.png b/assets/example_mv_images/7/back.png new file mode 100644 index 0000000000000000000000000000000000000000..5ef772d7a9f4d54844c4ef693b034eab5376fdf1 Binary files /dev/null and b/assets/example_mv_images/7/back.png differ diff --git a/assets/example_mv_images/7/front.png b/assets/example_mv_images/7/front.png new file mode 100644 index 0000000000000000000000000000000000000000..01b20d8e5fc9d5d79b85d663313c3e39f524eb68 Binary files /dev/null and b/assets/example_mv_images/7/front.png differ diff --git a/assets/example_mv_images/7/left.png b/assets/example_mv_images/7/left.png new file mode 100644 index 0000000000000000000000000000000000000000..bfa778a0bc7de8356d1223e1f22409f5ea6cd2ca Binary files /dev/null and b/assets/example_mv_images/7/left.png differ diff --git a/assets/example_mv_images/8/back.png b/assets/example_mv_images/8/back.png new file mode 100644 index 0000000000000000000000000000000000000000..d1d6b9d0bfba93d838d41965faf5307e97926501 Binary files /dev/null and b/assets/example_mv_images/8/back.png differ diff --git a/assets/example_mv_images/8/front.png b/assets/example_mv_images/8/front.png new file mode 100644 index 0000000000000000000000000000000000000000..9e3c6d8d56b881e110a3a8fadb6de667468622d1 Binary files /dev/null and b/assets/example_mv_images/8/front.png differ diff --git a/assets/example_mv_images/8/left.png b/assets/example_mv_images/8/left.png new file mode 100644 index 0000000000000000000000000000000000000000..2aeb68a0f237da6b9e456f62ad35a38b06eb7603 Binary files /dev/null and b/assets/example_mv_images/8/left.png differ diff --git a/assets/modelviewer-template.html b/assets/modelviewer-template.html index 5a81985e2ccc115efdb2848523da731945bb0957..0c75c6be82a0f85c5bb54a4402f1402a20f5a51e 100644 --- a/assets/modelviewer-template.html +++ b/assets/modelviewer-template.html @@ -3,22 +3,41 @@ - - + @@ -43,8 +62,20 @@
- +
+ + +
- + \ No newline at end of file diff --git a/assets/modelviewer-textured-template.html b/assets/modelviewer-textured-template.html index c873e01dcc5d69f6a65f875362403c60c4874617..ae6558184d9c27273e135a7e2f240415e5f2b953 100644 --- a/assets/modelviewer-textured-template.html +++ b/assets/modelviewer-textured-template.html @@ -3,8 +3,7 @@ - - +
- +
+
+ + +
+ +
+
+ Appearance +
+
Geometry
+
+
- - + \ No newline at end of file diff --git a/gradio_app.py b/gradio_app.py old mode 100644 new mode 100755 index 2ec55ded69db8b8659b548ff6e037b03d2745d4f..526569c5eed3a9a88cebe8a78808cd99636baba4 --- a/gradio_app.py +++ b/gradio_app.py @@ -1,4 +1,19 @@ +# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT +# except for the third-party components listed below. +# Hunyuan 3D does not impose any additional limitations beyond what is outlined +# in the repsective licenses of these third-party components. +# Users must comply with all terms and conditions of original licenses of these third-party +# components and must ensure that the usage of the third party components adheres to +# all relevant laws and regulations. + +# For avoidance of doubts, Hunyuan 3D means the large language models and +# their software and algorithms, including trained model weights, parameters (including +# optimizer states), machine-learning model code, inference-enabling code, training-enabling code, +# fine-tuning enabling code and other elements of the foregoing made publicly available +# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. + import os +import random import shutil import time from glob import glob @@ -6,47 +21,98 @@ from pathlib import Path import gradio as gr import torch +import trimesh import uvicorn from fastapi import FastAPI from fastapi.staticfiles import StaticFiles +import uuid + +from hy3dgen.shapegen.utils import logger + +MAX_SEED = 1e7 + +if True: + import os + import spaces + import subprocess + import sys + import shlex + print("cd /home/user/app/hy3dgen/texgen/differentiable_renderer/ && bash compile_mesh_painter.sh") + os.system("cd /home/user/app/hy3dgen/texgen/differentiable_renderer/ && bash compile_mesh_painter.sh") + print('install custom') + subprocess.run(shlex.split("pip install custom_rasterizer-0.1-cp310-cp310-linux_x86_64.whl"), check=True) def get_example_img_list(): print('Loading example img list ...') - return sorted(glob('./assets/example_images/*.png')) + return sorted(glob('./assets/example_images/**/*.png', recursive=True)) def get_example_txt_list(): print('Loading example txt list ...') txt_list = list() - for line in open('./assets/example_prompts.txt'): + for line in open('./assets/example_prompts.txt', encoding='utf-8'): txt_list.append(line.strip()) return txt_list -def gen_save_folder(max_size=60): +def get_example_mv_list(): + print('Loading example mv list ...') + mv_list = list() + root = './assets/example_mv_images' + for mv_dir in os.listdir(root): + view_list = [] + for view in ['front', 'back', 'left', 'right']: + path = os.path.join(root, mv_dir, f'{view}.png') + if os.path.exists(path): + view_list.append(path) + else: + view_list.append(None) + mv_list.append(view_list) + return mv_list + + +def gen_save_folder(max_size=200): os.makedirs(SAVE_DIR, exist_ok=True) - exists = set(int(_) for _ in os.listdir(SAVE_DIR) if not _.startswith(".")) - cur_id = min(set(range(max_size)) - exists) if len(exists) < max_size else -1 - if os.path.exists(f"{SAVE_DIR}/{(cur_id + 1) % max_size}"): - shutil.rmtree(f"{SAVE_DIR}/{(cur_id + 1) % max_size}") - print(f"remove {SAVE_DIR}/{(cur_id + 1) % max_size} success !!!") - save_folder = f"{SAVE_DIR}/{max(0, cur_id)}" - os.makedirs(save_folder, exist_ok=True) - print(f"mkdir {save_folder} suceess !!!") - return save_folder - - -def export_mesh(mesh, save_folder, textured=False): + + # 获取所有文件夹路径 + dirs = [f for f in Path(SAVE_DIR).iterdir() if f.is_dir()] + + # 如果文件夹数量超过 max_size,删除创建时间最久的文件夹 + if len(dirs) >= max_size: + # 按创建时间排序,最久的排在前面 + oldest_dir = min(dirs, key=lambda x: x.stat().st_ctime) + shutil.rmtree(oldest_dir) + print(f"Removed the oldest folder: {oldest_dir}") + + # 生成一个新的 uuid 文件夹名称 + new_folder = os.path.join(SAVE_DIR, str(uuid.uuid4())) + os.makedirs(new_folder, exist_ok=True) + print(f"Created new folder: {new_folder}") + + return new_folder + + +def export_mesh(mesh, save_folder, textured=False, type='glb'): if textured: - path = os.path.join(save_folder, f'textured_mesh.glb') + path = os.path.join(save_folder, f'textured_mesh.{type}') else: - path = os.path.join(save_folder, f'white_mesh.glb') - mesh.export(path, include_normals=textured) + path = os.path.join(save_folder, f'white_mesh.{type}') + if type not in ['glb', 'obj']: + mesh.export(path) + else: + mesh.export(path, include_normals=textured) return path +def randomize_seed_fn(seed: int, randomize_seed: bool) -> int: + if randomize_seed: + seed = random.randint(0, MAX_SEED) + return seed + + def build_model_viewer_html(save_folder, height=660, width=790, textured=False): + # Remove first folder from path to make relative path if textured: related_path = f"./textured_mesh.glb" template_name = './assets/modelviewer-textured-template.html' @@ -55,25 +121,20 @@ def build_model_viewer_html(save_folder, height=660, width=790, textured=False): related_path = f"./white_mesh.glb" template_name = './assets/modelviewer-template.html' output_html_path = os.path.join(save_folder, f'white_mesh.html') - - with open(os.path.join(CURRENT_DIR, template_name), 'r') as f: + offset = 50 if textured else 10 + with open(os.path.join(CURRENT_DIR, template_name), 'r', encoding='utf-8') as f: template_html = f.read() - obj_html = f""" -
- - -
- """ - with open(output_html_path, 'w') as f: - f.write(template_html.replace('', obj_html)) + with open(output_html_path, 'w', encoding='utf-8') as f: + template_html = template_html.replace('#height#', f'{height - offset}') + template_html = template_html.replace('#width#', f'{width}') + template_html = template_html.replace('#src#', f'{related_path}/') + f.write(template_html) - output_html_path = output_html_path.replace(SAVE_DIR + '/', '') - iframe_tag = f'' - print(f'Find html {output_html_path}, {os.path.exists(output_html_path)}') + rel_path = os.path.relpath(output_html_path, SAVE_DIR) + iframe_tag = f'' + print( + f'Find html file {output_html_path}, {os.path.exists(output_html_path)}, relative HTML path is /static/{rel_path}') return f"""
@@ -81,21 +142,58 @@ def build_model_viewer_html(save_folder, height=660, width=790, textured=False):
""" - +@spaces.GPU(duration=40) def _gen_shape( - caption, - image, + caption=None, + image=None, + mv_image_front=None, + mv_image_back=None, + mv_image_left=None, + mv_image_right=None, steps=50, guidance_scale=7.5, seed=1234, octree_resolution=256, check_box_rembg=False, + num_chunks=200000, + randomize_seed: bool = False, ): + if not MV_MODE and image is None and caption is None: + raise gr.Error("Please provide either a caption or an image.") + if MV_MODE: + if mv_image_front is None and mv_image_back is None and mv_image_left is None and mv_image_right is None: + raise gr.Error("Please provide at least one view image.") + image = {} + if mv_image_front: + image['front'] = mv_image_front + if mv_image_back: + image['back'] = mv_image_back + if mv_image_left: + image['left'] = mv_image_left + if mv_image_right: + image['right'] = mv_image_right + + seed = int(randomize_seed_fn(seed, randomize_seed)) + + octree_resolution = int(octree_resolution) if caption: print('prompt is', caption) save_folder = gen_save_folder() - stats = {} + stats = { + 'model': { + 'shapegen': f'{args.model_path}/{args.subfolder}', + 'texgen': f'{args.texgen_model_path}', + }, + 'params': { + 'caption': caption, + 'steps': steps, + 'guidance_scale': guidance_scale, + 'seed': seed, + 'octree_resolution': octree_resolution, + 'check_box_rembg': check_box_rembg, + 'num_chunks': num_chunks, + } + } time_meta = {} - start_time_0 = time.time() if image is None: start_time = time.time() @@ -105,204 +203,361 @@ def _gen_shape( raise gr.Error(f"Text to 3D is disable. Please enable it by `python gradio_app.py --enable_t23d`.") time_meta['text2image'] = time.time() - start_time - image.save(os.path.join(save_folder, 'input.png')) - - print(image.mode) - if check_box_rembg or image.mode == "RGB": + # remove disk io to make responding faster, uncomment at your will. + # image.save(os.path.join(save_folder, 'input.png')) + if MV_MODE: start_time = time.time() - image = rmbg_worker(image.convert('RGB')) - time_meta['rembg'] = time.time() - start_time + for k, v in image.items(): + if check_box_rembg or v.mode == "RGB": + img = rmbg_worker(v.convert('RGB')) + image[k] = img + time_meta['remove background'] = time.time() - start_time + else: + if check_box_rembg or image.mode == "RGB": + start_time = time.time() + image = rmbg_worker(image.convert('RGB')) + time_meta['remove background'] = time.time() - start_time - image.save(os.path.join(save_folder, 'rembg.png')) + # remove disk io to make responding faster, uncomment at your will. + # image.save(os.path.join(save_folder, 'rembg.png')) # image to white model start_time = time.time() generator = torch.Generator() generator = generator.manual_seed(int(seed)) - mesh = i23d_worker( + outputs = i23d_worker( image=image, num_inference_steps=steps, guidance_scale=guidance_scale, generator=generator, - octree_resolution=octree_resolution - )[0] + octree_resolution=octree_resolution, + num_chunks=num_chunks, + output_type='mesh' + ) + time_meta['shape generation'] = time.time() - start_time + logger.info("---Shape generation takes %s seconds ---" % (time.time() - start_time)) - mesh = FloaterRemover()(mesh) - mesh = DegenerateFaceRemover()(mesh) - mesh = FaceReducer()(mesh) + tmp_start = time.time() + mesh = export_to_trimesh(outputs)[0] + time_meta['export to trimesh'] = time.time() - tmp_start stats['number_of_faces'] = mesh.faces.shape[0] stats['number_of_vertices'] = mesh.vertices.shape[0] - time_meta['image_to_textured_3d'] = {'total': time.time() - start_time} - time_meta['total'] = time.time() - start_time_0 stats['time'] = time_meta - return mesh, image, save_folder - + main_image = image if not MV_MODE else image['front'] + return mesh, main_image, save_folder, stats, seed +@spaces.GPU(duration=90) def generation_all( - caption, - image, + caption=None, + image=None, + mv_image_front=None, + mv_image_back=None, + mv_image_left=None, + mv_image_right=None, steps=50, guidance_scale=7.5, seed=1234, octree_resolution=256, - check_box_rembg=False + check_box_rembg=False, + num_chunks=200000, + randomize_seed: bool = False, ): - mesh, image, save_folder = _gen_shape( + start_time_0 = time.time() + mesh, image, save_folder, stats, seed = _gen_shape( caption, image, + mv_image_front=mv_image_front, + mv_image_back=mv_image_back, + mv_image_left=mv_image_left, + mv_image_right=mv_image_right, steps=steps, guidance_scale=guidance_scale, seed=seed, octree_resolution=octree_resolution, - check_box_rembg=check_box_rembg + check_box_rembg=check_box_rembg, + num_chunks=num_chunks, + randomize_seed=randomize_seed, ) path = export_mesh(mesh, save_folder, textured=False) - model_viewer_html = build_model_viewer_html(save_folder, height=596, width=700) + # tmp_time = time.time() + # mesh = floater_remove_worker(mesh) + # mesh = degenerate_face_remove_worker(mesh) + # logger.info("---Postprocessing takes %s seconds ---" % (time.time() - tmp_time)) + # stats['time']['postprocessing'] = time.time() - tmp_time + + tmp_time = time.time() + mesh = face_reduce_worker(mesh) + logger.info("---Face Reduction takes %s seconds ---" % (time.time() - tmp_time)) + stats['time']['face reduction'] = time.time() - tmp_time + + tmp_time = time.time() textured_mesh = texgen_worker(mesh, image) - path_textured = export_mesh(textured_mesh, save_folder, textured=True) - model_viewer_html_textured = build_model_viewer_html(save_folder, height=596, width=700, textured=True) + logger.info("---Texture Generation takes %s seconds ---" % (time.time() - tmp_time)) + stats['time']['texture generation'] = time.time() - tmp_time + stats['time']['total'] = time.time() - start_time_0 + textured_mesh.metadata['extras'] = stats + path_textured = export_mesh(textured_mesh, save_folder, textured=True) + model_viewer_html_textured = build_model_viewer_html(save_folder, height=HTML_HEIGHT, width=HTML_WIDTH, + textured=True) + if args.low_vram_mode: + torch.cuda.empty_cache() return ( - gr.update(value=path, visible=True), - gr.update(value=path_textured, visible=True), - model_viewer_html, + gr.update(value=path), + gr.update(value=path_textured), model_viewer_html_textured, + stats, + seed, ) - +@spaces.GPU(duration=40) def shape_generation( - caption, - image, + caption=None, + image=None, + mv_image_front=None, + mv_image_back=None, + mv_image_left=None, + mv_image_right=None, steps=50, guidance_scale=7.5, seed=1234, octree_resolution=256, check_box_rembg=False, + num_chunks=200000, + randomize_seed: bool = False, ): - mesh, image, save_folder = _gen_shape( + start_time_0 = time.time() + mesh, image, save_folder, stats, seed = _gen_shape( caption, image, + mv_image_front=mv_image_front, + mv_image_back=mv_image_back, + mv_image_left=mv_image_left, + mv_image_right=mv_image_right, steps=steps, guidance_scale=guidance_scale, seed=seed, octree_resolution=octree_resolution, - check_box_rembg=check_box_rembg + check_box_rembg=check_box_rembg, + num_chunks=num_chunks, + randomize_seed=randomize_seed, ) + stats['time']['total'] = time.time() - start_time_0 + mesh.metadata['extras'] = stats path = export_mesh(mesh, save_folder, textured=False) - model_viewer_html = build_model_viewer_html(save_folder, height=596, width=700) - + model_viewer_html = build_model_viewer_html(save_folder, height=HTML_HEIGHT, width=HTML_WIDTH) + if args.low_vram_mode: + torch.cuda.empty_cache() return ( - gr.update(value=path, visible=True), + gr.update(value=path), model_viewer_html, + stats, + seed, ) def build_app(): - title_html = """ + title = 'Hunyuan3D-2: High Resolution Textured 3D Assets Generation' + if MV_MODE: + title = 'Hunyuan3D-2mv: Image to 3D Generation with 1-4 Views' + if 'mini' in args.subfolder: + title = 'Hunyuan3D-2mini: Strong 0.6B Image to Shape Generator' + if TURBO_MODE: + title = title.replace(':', '-Turbo: Fast ') + + title_html = f"""
- Hunyuan3D-2: Scaling Diffusion Models for High Resolution Textured 3D Assets Generation + {title}
Tencent Hunyuan3D Team
- Github Page   + GithubHomepage   + Hunyuan3D StudioTechnical Report   - Models   + Pretrained Models
+ """ + custom_css = """ + .app.svelte-wpkpf6.svelte-wpkpf6:not(.fill_width) { + max-width: 1480px; + } + .mv-image button .wrap { + font-size: 10px; + } + + .mv-image .icon-wrap { + width: 20px; + } + """ - with gr.Blocks(theme=gr.themes.Base(), title='Hunyuan-3D-2.0') as demo: + with gr.Blocks(theme=gr.themes.Base(), title='Hunyuan-3D-2.0', analytics_enabled=False, css=custom_css) as demo: gr.HTML(title_html) with gr.Row(): - with gr.Column(scale=2): - with gr.Tabs() as tabs_prompt: - with gr.Tab('Image Prompt', id='tab_img_prompt') as tab_ip: + with gr.Column(scale=3): + with gr.Tabs(selected='tab_img_prompt') as tabs_prompt: + with gr.Tab('Image Prompt', id='tab_img_prompt', visible=not MV_MODE) as tab_ip: image = gr.Image(label='Image', type='pil', image_mode='RGBA', height=290) - with gr.Row(): - check_box_rembg = gr.Checkbox(value=True, label='Remove Background') - with gr.Tab('Text Prompt', id='tab_txt_prompt', visible=HAS_T2I) as tab_tp: + with gr.Tab('Text Prompt', id='tab_txt_prompt', visible=HAS_T2I and not MV_MODE) as tab_tp: caption = gr.Textbox(label='Text Prompt', placeholder='HunyuanDiT will be used to generate image.', info='Example: A 3D model of a cute cat, white background') - - with gr.Accordion('Advanced Options', open=False): - num_steps = gr.Slider(maximum=50, minimum=20, value=30, step=1, label='Inference Steps') - octree_resolution = gr.Dropdown([256, 384, 512], value=256, label='Octree Resolution') - cfg_scale = gr.Number(value=5.5, label='Guidance Scale') - seed = gr.Slider(maximum=1e7, minimum=0, value=1234, label='Seed') - - with gr.Group(): - btn = gr.Button(value='Generate Shape Only', variant='primary') - btn_all = gr.Button(value='Generate Shape and Texture', variant='primary', visible=HAS_TEXTUREGEN) + with gr.Tab('MultiView Prompt', visible=MV_MODE) as tab_mv: + # gr.Label('Please upload at least one front image.') + with gr.Row(): + mv_image_front = gr.Image(label='Front', type='pil', image_mode='RGBA', height=140, + min_width=100, elem_classes='mv-image') + mv_image_back = gr.Image(label='Back', type='pil', image_mode='RGBA', height=140, + min_width=100, elem_classes='mv-image') + with gr.Row(): + mv_image_left = gr.Image(label='Left', type='pil', image_mode='RGBA', height=140, + min_width=100, elem_classes='mv-image') + mv_image_right = gr.Image(label='Right', type='pil', image_mode='RGBA', height=140, + min_width=100, elem_classes='mv-image') + + with gr.Row(): + btn = gr.Button(value='Gen Shape', variant='primary', min_width=100) + btn_all = gr.Button(value='Gen Textured Shape', + variant='primary', + visible=HAS_TEXTUREGEN, + min_width=100) with gr.Group(): file_out = gr.File(label="File", visible=False) file_out2 = gr.File(label="File", visible=False) - with gr.Column(scale=5): - with gr.Tabs(): - with gr.Tab('Generated Mesh') as mesh1: - html_output1 = gr.HTML(HTML_OUTPUT_PLACEHOLDER, label='Output') - with gr.Tab('Generated Textured Mesh') as mesh2: - html_output2 = gr.HTML(HTML_OUTPUT_PLACEHOLDER, label='Output') - - with gr.Column(scale=2): - with gr.Tabs() as gallery: - with gr.Tab('Image to 3D Gallery', id='tab_img_gallery') as tab_gi: + with gr.Tabs(selected='tab_options' if TURBO_MODE else 'tab_export'): + with gr.Tab("Options", id='tab_options', visible=TURBO_MODE): + gen_mode = gr.Radio(label='Generation Mode', + info='Recommendation: Turbo for most cases, Fast for very complex cases, Standard seldom use.', + choices=['Turbo', 'Fast', 'Standard'], value='Turbo') + decode_mode = gr.Radio(label='Decoding Mode', + info='The resolution for exporting mesh from generated vectset', + choices=['Low', 'Standard', 'High'], + value='Standard') + with gr.Tab('Advanced Options', id='tab_advanced_options'): + with gr.Row(): + check_box_rembg = gr.Checkbox(value=True, label='Remove Background', min_width=100) + randomize_seed = gr.Checkbox(label="Randomize seed", value=True, min_width=100) + seed = gr.Slider( + label="Seed", + minimum=0, + maximum=MAX_SEED, + step=1, + value=1234, + min_width=100, + ) + with gr.Row(): + num_steps = gr.Slider(maximum=100, + minimum=1, + value=5 if 'turbo' in args.subfolder else 30, + step=1, label='Inference Steps') + octree_resolution = gr.Slider(maximum=512, minimum=16, value=256, label='Octree Resolution') + with gr.Row(): + cfg_scale = gr.Number(value=5.0, label='Guidance Scale', min_width=100) + num_chunks = gr.Slider(maximum=5000000, minimum=1000, value=8000, + label='Number of Chunks', min_width=100) + with gr.Tab("Export", id='tab_export'): + with gr.Row(): + file_type = gr.Dropdown(label='File Type', choices=SUPPORTED_FORMATS, + value='glb', min_width=100) + reduce_face = gr.Checkbox(label='Simplify Mesh', value=False, min_width=100) + export_texture = gr.Checkbox(label='Include Texture', value=False, + visible=False, min_width=100) + target_face_num = gr.Slider(maximum=1000000, minimum=100, value=10000, + label='Target Face Number') + with gr.Row(): + confirm_export = gr.Button(value="Transform", min_width=100) + file_export = gr.DownloadButton(label="Download", variant='primary', + interactive=False, min_width=100) + + with gr.Column(scale=6): + with gr.Tabs(selected='gen_mesh_panel') as tabs_output: + with gr.Tab('Generated Mesh', id='gen_mesh_panel'): + html_gen_mesh = gr.HTML(HTML_OUTPUT_PLACEHOLDER, label='Output') + with gr.Tab('Exporting Mesh', id='export_mesh_panel'): + html_export_mesh = gr.HTML(HTML_OUTPUT_PLACEHOLDER, label='Output') + with gr.Tab('Mesh Statistic', id='stats_panel'): + stats = gr.Json({}, label='Mesh Stats') + + with gr.Column(scale=3 if MV_MODE else 2): + with gr.Tabs(selected='tab_img_gallery') as gallery: + with gr.Tab('Image to 3D Gallery', id='tab_img_gallery', visible=not MV_MODE) as tab_gi: with gr.Row(): gr.Examples(examples=example_is, inputs=[image], - label="Image Prompts", examples_per_page=18) + label=None, examples_per_page=18) - with gr.Tab('Text to 3D Gallery', id='tab_txt_gallery', visible=HAS_T2I) as tab_gt: + with gr.Tab('Text to 3D Gallery', id='tab_txt_gallery', visible=HAS_T2I and not MV_MODE) as tab_gt: with gr.Row(): gr.Examples(examples=example_ts, inputs=[caption], - label="Text Prompts", examples_per_page=18) + label=None, examples_per_page=18) + with gr.Tab('MultiView to 3D Gallery', id='tab_mv_gallery', visible=MV_MODE) as tab_mv: + with gr.Row(): + gr.Examples(examples=example_mvs, + inputs=[mv_image_front, mv_image_back, mv_image_left, mv_image_right], + label=None, examples_per_page=6) + gr.HTML(f""" +
+ Activated Model - Shape Generation ({args.model_path}/{args.subfolder}) ; Texture Generation ({'Hunyuan3D-2' if HAS_TEXTUREGEN else 'Unavailable'}) +
+ """) if not HAS_TEXTUREGEN: - gr.HTML(""") -
+ gr.HTML(""" +
Warning: Texture synthesis is disable due to missing requirements, - please install requirements following README.md to activate it. + please install requirements following README.mdto activate it.
""") if not args.enable_t23d: gr.HTML(""" -
+
Warning: Text to 3D is disable. To activate it, please run `python gradio_app.py --enable_t23d`.
""") - tab_gi.select(fn=lambda: gr.update(selected='tab_img_prompt'), outputs=tabs_prompt) + tab_ip.select(fn=lambda: gr.update(selected='tab_img_gallery'), outputs=gallery) if HAS_T2I: - tab_gt.select(fn=lambda: gr.update(selected='tab_txt_prompt'), outputs=tabs_prompt) + tab_tp.select(fn=lambda: gr.update(selected='tab_txt_gallery'), outputs=gallery) btn.click( shape_generation, inputs=[ caption, image, + mv_image_front, + mv_image_back, + mv_image_left, + mv_image_right, num_steps, cfg_scale, seed, octree_resolution, check_box_rembg, + num_chunks, + randomize_seed, ], - outputs=[file_out, html_output1] + outputs=[file_out, html_gen_mesh, stats, seed] ).then( - lambda: gr.update(visible=True), - outputs=[file_out], + lambda: (gr.update(visible=False, value=False), gr.update(interactive=True), gr.update(interactive=True), + gr.update(interactive=False)), + outputs=[export_texture, reduce_face, confirm_export, file_export], + ).then( + lambda: gr.update(selected='gen_mesh_panel'), + outputs=[tabs_output], ) btn_all.click( @@ -310,16 +565,88 @@ def build_app(): inputs=[ caption, image, + mv_image_front, + mv_image_back, + mv_image_left, + mv_image_right, num_steps, cfg_scale, seed, octree_resolution, check_box_rembg, + num_chunks, + randomize_seed, ], - outputs=[file_out, file_out2, html_output1, html_output2] + outputs=[file_out, file_out2, html_gen_mesh, stats, seed] + ).then( + lambda: (gr.update(visible=True, value=True), gr.update(interactive=False), gr.update(interactive=True), + gr.update(interactive=False)), + outputs=[export_texture, reduce_face, confirm_export, file_export], ).then( - lambda: (gr.update(visible=True), gr.update(visible=True)), - outputs=[file_out, file_out2], + lambda: gr.update(selected='gen_mesh_panel'), + outputs=[tabs_output], + ) + + def on_gen_mode_change(value): + if value == 'Turbo': + return gr.update(value=5) + elif value == 'Fast': + return gr.update(value=10) + else: + return gr.update(value=30) + + gen_mode.change(on_gen_mode_change, inputs=[gen_mode], outputs=[num_steps]) + + def on_decode_mode_change(value): + if value == 'Low': + return gr.update(value=196) + elif value == 'Standard': + return gr.update(value=256) + else: + return gr.update(value=384) + + decode_mode.change(on_decode_mode_change, inputs=[decode_mode], outputs=[octree_resolution]) + + def on_export_click(file_out, file_out2, file_type, reduce_face, export_texture, target_face_num): + if file_out is None: + raise gr.Error('Please generate a mesh first.') + + print(f'exporting {file_out}') + print(f'reduce face to {target_face_num}') + if export_texture: + mesh = trimesh.load(file_out2) + save_folder = gen_save_folder() + path = export_mesh(mesh, save_folder, textured=True, type=file_type) + + # for preview + save_folder = gen_save_folder() + _ = export_mesh(mesh, save_folder, textured=True) + model_viewer_html = build_model_viewer_html(save_folder, height=HTML_HEIGHT, width=HTML_WIDTH, + textured=True) + else: + mesh = trimesh.load(file_out) + mesh = floater_remove_worker(mesh) + mesh = degenerate_face_remove_worker(mesh) + if reduce_face: + mesh = face_reduce_worker(mesh, target_face_num) + save_folder = gen_save_folder() + path = export_mesh(mesh, save_folder, textured=False, type=file_type) + + # for preview + save_folder = gen_save_folder() + _ = export_mesh(mesh, save_folder, textured=False) + model_viewer_html = build_model_viewer_html(save_folder, height=HTML_HEIGHT, width=HTML_WIDTH, + textured=False) + print(f'export to {path}') + return model_viewer_html, gr.update(value=path, interactive=True) + + confirm_export.click( + lambda: gr.update(selected='export_mesh_panel'), + outputs=[tabs_output], + ).then( + on_export_click, + inputs=[file_out, file_out2, file_type, reduce_face, export_texture, target_face_num], + outputs=[html_export_mesh, file_export] ) return demo @@ -329,18 +656,40 @@ if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() - parser.add_argument('--port', type=int, default=8080) + parser.add_argument("--model_path", type=str, default='tencent/Hunyuan3D-2') + parser.add_argument("--subfolder", type=str, default='hunyuan3d-dit-v2-0') + parser.add_argument("--texgen_model_path", type=str, default='tencent/Hunyuan3D-2') + parser.add_argument('--port', type=int, default=7860) + parser.add_argument('--host', type=str, default='0.0.0.0') + parser.add_argument('--device', type=str, default='cuda') + parser.add_argument('--mc_algo', type=str, default='mc') parser.add_argument('--cache-path', type=str, default='gradio_cache') parser.add_argument('--enable_t23d', action='store_true') + parser.add_argument('--disable_tex', action='store_true') + parser.add_argument('--enable_flashvdm', action='store_true') + parser.add_argument('--compile', action='store_true') + parser.add_argument('--low_vram_mode', action='store_true') args = parser.parse_args() + args.enable_flashvdm = True + args.enable_t23d = False + SAVE_DIR = args.cache_path os.makedirs(SAVE_DIR, exist_ok=True) CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) - - HTML_OUTPUT_PLACEHOLDER = """ -
+ MV_MODE = 'mv' in args.model_path + TURBO_MODE = 'turbo' in args.subfolder + + HTML_HEIGHT = 690 if MV_MODE else 650 + HTML_WIDTH = 500 + HTML_OUTPUT_PLACEHOLDER = f""" +
+
+

Welcome to Hunyuan3D!

+

No mesh here.

+
+
""" INPUT_MESH_HTML = """ @@ -350,31 +699,56 @@ if __name__ == '__main__': """ example_is = get_example_img_list() example_ts = get_example_txt_list() + example_mvs = get_example_mv_list() - try: - from hy3dgen.texgen import Hunyuan3DPaintPipeline + SUPPORTED_FORMATS = ['glb', 'obj', 'ply', 'stl'] - texgen_worker = Hunyuan3DPaintPipeline.from_pretrained('tencent/Hunyuan3D-2') - HAS_TEXTUREGEN = True - except Exception as e: - print(e) - print("Failed to load texture generator.") - print('Please try to install requirements by following README.md') - HAS_TEXTUREGEN = False + HAS_TEXTUREGEN = False + if not args.disable_tex: + try: + from hy3dgen.texgen import Hunyuan3DPaintPipeline + + texgen_worker = Hunyuan3DPaintPipeline.from_pretrained(args.texgen_model_path) + if args.low_vram_mode: + texgen_worker.enable_model_cpu_offload() + # Not help much, ignore for now. + # if args.compile: + # texgen_worker.models['delight_model'].pipeline.unet.compile() + # texgen_worker.models['delight_model'].pipeline.vae.compile() + # texgen_worker.models['multiview_model'].pipeline.unet.compile() + # texgen_worker.models['multiview_model'].pipeline.vae.compile() + HAS_TEXTUREGEN = True + except Exception as e: + print(e) + print("Failed to load texture generator.") + print('Please try to install requirements by following README.md') + HAS_TEXTUREGEN = False - HAS_T2I = False + HAS_T2I = True if args.enable_t23d: from hy3dgen.text2image import HunyuanDiTPipeline - t2i_worker = HunyuanDiTPipeline('Tencent-Hunyuan--HunyuanDiT-v1.1-Diffusers-Distilled') + t2i_worker = HunyuanDiTPipeline('Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers-Distilled') HAS_T2I = True - from hy3dgen.shapegen import FaceReducer, FloaterRemover, DegenerateFaceRemover, \ + from hy3dgen.shapegen import FaceReducer, FloaterRemover, DegenerateFaceRemover, MeshSimplifier, \ Hunyuan3DDiTFlowMatchingPipeline + from hy3dgen.shapegen.pipelines import export_to_trimesh from hy3dgen.rembg import BackgroundRemover rmbg_worker = BackgroundRemover() - i23d_worker = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained('tencent/Hunyuan3D-2') + i23d_worker = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained( + args.model_path, + subfolder=args.subfolder, + use_safetensors=True, + device=args.device, + ) + if args.enable_flashvdm: + mc_algo = 'mc' if args.device in ['cpu', 'mps'] else args.mc_algo + i23d_worker.enable_flashvdm(mc_algo=mc_algo) + if args.compile: + i23d_worker.compile() + floater_remove_worker = FloaterRemover() degenerate_face_remove_worker = DegenerateFaceRemover() face_reduce_worker = FaceReducer() @@ -383,10 +757,13 @@ if __name__ == '__main__': # create a FastAPI app app = FastAPI() # create a static directory to store the static files - static_dir = Path('./gradio_cache') + static_dir = Path(SAVE_DIR).absolute() static_dir.mkdir(parents=True, exist_ok=True) - app.mount("/static", StaticFiles(directory=static_dir), name="static") + app.mount("/static", StaticFiles(directory=static_dir, html=True), name="static") + shutil.copytree('./assets/env_maps', os.path.join(static_dir, 'env_maps'), dirs_exist_ok=True) + if args.low_vram_mode: + torch.cuda.empty_cache() demo = build_app() app = gr.mount_gradio_app(app, demo, path="/") - uvicorn.run(app, host="0.0.0.0", port=args.port) + uvicorn.run(app, host=args.host, port=args.port) diff --git a/hy3dgen/__init__.py b/hy3dgen/__init__.py index e307c3f8c1292da02f308e4b59ef0bcd6fe7305e..8bb2bf86e283e50f0df2ecfba8fc66289df09901 100644 --- a/hy3dgen/__init__.py +++ b/hy3dgen/__init__.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -20,4 +10,4 @@ # their software and algorithms, including trained model weights, parameters (including # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, # fine-tuning enabling code and other elements of the foregoing made publicly available -# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. +# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. \ No newline at end of file diff --git a/hy3dgen/rembg.py b/hy3dgen/rembg.py index c0d99483c8354fc10c6689b5cf12ebcd44368d92..6247f060c9f325b1e267668baf236ec8e4c2dae9 100644 --- a/hy3dgen/rembg.py +++ b/hy3dgen/rembg.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -22,7 +12,6 @@ # fine-tuning enabling code and other elements of the foregoing made publicly available # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. - from PIL import Image from rembg import remove, new_session diff --git a/hy3dgen/shapegen/__init__.py b/hy3dgen/shapegen/__init__.py old mode 100644 new mode 100755 index d1f9534c15d029511d910d29e45da5ba7b8c8714..1b1f9cce42c1d2bef94bbb2d7b088d24e76fe01e --- a/hy3dgen/shapegen/__init__.py +++ b/hy3dgen/shapegen/__init__.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -23,5 +13,5 @@ # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. from .pipelines import Hunyuan3DDiTPipeline, Hunyuan3DDiTFlowMatchingPipeline -from .postprocessors import FaceReducer, FloaterRemover, DegenerateFaceRemover +from .postprocessors import FaceReducer, FloaterRemover, DegenerateFaceRemover, MeshSimplifier from .preprocessors import ImageProcessorV2, IMAGE_PROCESSORS, DEFAULT_IMAGEPROCESSOR diff --git a/hy3dgen/shapegen/models/__init__.py b/hy3dgen/shapegen/models/__init__.py old mode 100644 new mode 100755 index 684b3e389737fb988f5e363e777c34f6cd1fe4ea..8179353ba7a5bdb8bcc30baa64e319fb8f884d57 --- a/hy3dgen/shapegen/models/__init__.py +++ b/hy3dgen/shapegen/models/__init__.py @@ -23,6 +23,6 @@ # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. +from .autoencoders import ShapeVAE from .conditioner import DualImageEncoder, SingleImageEncoder, DinoImageEncoder, CLIPImageEncoder -from .hunyuan3ddit import Hunyuan3DDiT -from .vae import ShapeVAE +from .denoisers import Hunyuan3DDiT diff --git a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/__init__.py b/hy3dgen/shapegen/models/autoencoders/__init__.py similarity index 61% rename from hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/__init__.py rename to hy3dgen/shapegen/models/autoencoders/__init__.py index e307c3f8c1292da02f308e4b59ef0bcd6fe7305e..20bbf8d9559f8d5de7d7ae2d88bbb0348a197dc4 100644 --- a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/__init__.py +++ b/hy3dgen/shapegen/models/autoencoders/__init__.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -21,3 +11,10 @@ # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, # fine-tuning enabling code and other elements of the foregoing made publicly available # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. + +from .attention_blocks import CrossAttentionDecoder +from .attention_processors import FlashVDMCrossAttentionProcessor, CrossAttentionProcessor, \ + FlashVDMTopMCrossAttentionProcessor +from .model import ShapeVAE, VectsetVAE +from .surface_extractors import SurfaceExtractors, MCSurfaceExtractor, DMCSurfaceExtractor, Latent2MeshOutput +from .volume_decoders import HierarchicalVolumeDecoding, FlashVDMVolumeDecoding, VanillaVolumeDecoder diff --git a/hy3dgen/shapegen/models/vae.py b/hy3dgen/shapegen/models/autoencoders/attention_blocks.py similarity index 63% rename from hy3dgen/shapegen/models/vae.py rename to hy3dgen/shapegen/models/autoencoders/attention_blocks.py index aef2784ac0db653714e711d12697eafc962c2aa3..ab34eeb4eb8b19eb52e1ff188aaf785ad3fa96cb 100644 --- a/hy3dgen/shapegen/models/vae.py +++ b/hy3dgen/shapegen/models/autoencoders/attention_blocks.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -22,15 +12,25 @@ # fine-tuning enabling code and other elements of the foregoing made publicly available # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. -from typing import Tuple, List, Union, Optional -import numpy as np +import os +from typing import Optional + import torch import torch.nn as nn -import torch.nn.functional as F -from einops import rearrange, repeat -from skimage import measure -from tqdm import tqdm +from einops import rearrange + +from .attention_processors import CrossAttentionProcessor +from ...utils import logger + +scaled_dot_product_attention = nn.functional.scaled_dot_product_attention + +if os.environ.get('USE_SAGEATTN', '0') == '1': + try: + from sageattention import sageattn + except ImportError: + raise ImportError('Please install the package "sageattention" to use this USE_SAGEATTN.') + scaled_dot_product_attention = sageattn class FourierEmbedder(nn.Module): @@ -166,13 +166,14 @@ class MLP(nn.Module): def __init__( self, *, width: int, + expand_ratio: int = 4, output_width: int = None, drop_path_rate: float = 0.0 ): super().__init__() self.width = width - self.c_fc = nn.Linear(width, width * 4) - self.c_proj = nn.Linear(width * 4, output_width if output_width is not None else width) + self.c_fc = nn.Linear(width, width * expand_ratio) + self.c_proj = nn.Linear(width * expand_ratio, output_width if output_width is not None else width) self.gelu = nn.GELU() self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity() @@ -196,6 +197,8 @@ class QKVMultiheadCrossAttention(nn.Module): self.q_norm = norm_layer(width // heads, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity() self.k_norm = norm_layer(width // heads, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity() + self.attn_processor = CrossAttentionProcessor() + def forward(self, q, kv): _, n_ctx, _ = q.shape bs, n_data, width = kv.shape @@ -206,10 +209,9 @@ class QKVMultiheadCrossAttention(nn.Module): q = self.q_norm(q) k = self.k_norm(k) - q, k, v = map(lambda t: rearrange(t, 'b n h d -> b h n d', h=self.heads), (q, k, v)) - out = F.scaled_dot_product_attention(q, k, v).transpose(1, 2).reshape(bs, n_ctx, -1) - + out = self.attn_processor(self, q, k, v) + out = out.transpose(1, 2).reshape(bs, n_ctx, -1) return out @@ -223,7 +225,8 @@ class MultiheadCrossAttention(nn.Module): n_data: Optional[int] = None, data_width: Optional[int] = None, norm_layer=nn.LayerNorm, - qk_norm: bool = False + qk_norm: bool = False, + kv_cache: bool = False, ): super().__init__() self.n_data = n_data @@ -240,10 +243,18 @@ class MultiheadCrossAttention(nn.Module): norm_layer=norm_layer, qk_norm=qk_norm ) + self.kv_cache = kv_cache + self.data = None def forward(self, x, data): x = self.c_q(x) - data = self.c_kv(data) + if self.kv_cache: + if self.data is None: + self.data = self.c_kv(data) + logger.info('Save kv cache,this should be called only once for one mesh') + data = self.data + else: + data = self.c_kv(data) x = self.attention(x, data) x = self.c_proj(x) return x @@ -256,6 +267,7 @@ class ResidualCrossAttentionBlock(nn.Module): n_data: Optional[int] = None, width: int, heads: int, + mlp_expand_ratio: int = 4, data_width: Optional[int] = None, qkv_bias: bool = True, norm_layer=nn.LayerNorm, @@ -278,7 +290,7 @@ class ResidualCrossAttentionBlock(nn.Module): self.ln_1 = norm_layer(width, elementwise_affine=True, eps=1e-6) self.ln_2 = norm_layer(data_width, elementwise_affine=True, eps=1e-6) self.ln_3 = norm_layer(width, elementwise_affine=True, eps=1e-6) - self.mlp = MLP(width=width) + self.mlp = MLP(width=width, expand_ratio=mlp_expand_ratio) def forward(self, x: torch.Tensor, data: torch.Tensor): x = x + self.attn(self.ln_1(x), self.ln_2(data)) @@ -312,7 +324,7 @@ class QKVMultiheadAttention(nn.Module): k = self.k_norm(k) q, k, v = map(lambda t: rearrange(t, 'b n h d -> b h n d', h=self.heads), (q, k, v)) - out = F.scaled_dot_product_attention(q, k, v).transpose(1, 2).reshape(bs, n_ctx, -1) + out = scaled_dot_product_attention(q, k, v).transpose(1, 2).reshape(bs, n_ctx, -1) return out @@ -430,207 +442,52 @@ class CrossAttentionDecoder(nn.Module): fourier_embedder: FourierEmbedder, width: int, heads: int, + mlp_expand_ratio: int = 4, + downsample_ratio: int = 1, + enable_ln_post: bool = True, qkv_bias: bool = True, qk_norm: bool = False, label_type: str = "binary" ): super().__init__() + self.enable_ln_post = enable_ln_post self.fourier_embedder = fourier_embedder - + self.downsample_ratio = downsample_ratio self.query_proj = nn.Linear(self.fourier_embedder.out_dim, width) - + if self.downsample_ratio != 1: + self.latents_proj = nn.Linear(width * downsample_ratio, width) + if self.enable_ln_post == False: + qk_norm = False self.cross_attn_decoder = ResidualCrossAttentionBlock( n_data=num_latents, width=width, + mlp_expand_ratio=mlp_expand_ratio, heads=heads, qkv_bias=qkv_bias, qk_norm=qk_norm ) - self.ln_post = nn.LayerNorm(width) + if self.enable_ln_post: + self.ln_post = nn.LayerNorm(width) self.output_proj = nn.Linear(width, out_channels) self.label_type = label_type - - def forward(self, queries: torch.FloatTensor, latents: torch.FloatTensor): - queries = self.query_proj(self.fourier_embedder(queries).to(latents.dtype)) - x = self.cross_attn_decoder(queries, latents) - x = self.ln_post(x) + self.count = 0 + + def set_cross_attention_processor(self, processor): + self.cross_attn_decoder.attn.attention.attn_processor = processor + + def set_default_cross_attention_processor(self): + self.cross_attn_decoder.attn.attention.attn_processor = CrossAttentionProcessor + + def forward(self, queries=None, query_embeddings=None, latents=None): + if query_embeddings is None: + query_embeddings = self.query_proj(self.fourier_embedder(queries).to(latents.dtype)) + self.count += query_embeddings.shape[1] + if self.downsample_ratio != 1: + latents = self.latents_proj(latents) + x = self.cross_attn_decoder(query_embeddings, latents) + if self.enable_ln_post: + x = self.ln_post(x) occ = self.output_proj(x) return occ - - -def generate_dense_grid_points(bbox_min: np.ndarray, - bbox_max: np.ndarray, - octree_depth: int, - indexing: str = "ij", - octree_resolution: int = None, - ): - length = bbox_max - bbox_min - num_cells = np.exp2(octree_depth) - if octree_resolution is not None: - num_cells = octree_resolution - - x = np.linspace(bbox_min[0], bbox_max[0], int(num_cells) + 1, dtype=np.float32) - y = np.linspace(bbox_min[1], bbox_max[1], int(num_cells) + 1, dtype=np.float32) - z = np.linspace(bbox_min[2], bbox_max[2], int(num_cells) + 1, dtype=np.float32) - [xs, ys, zs] = np.meshgrid(x, y, z, indexing=indexing) - xyz = np.stack((xs, ys, zs), axis=-1) - xyz = xyz.reshape(-1, 3) - grid_size = [int(num_cells) + 1, int(num_cells) + 1, int(num_cells) + 1] - - return xyz, grid_size, length - - -def center_vertices(vertices): - """Translate the vertices so that bounding box is centered at zero.""" - vert_min = vertices.min(dim=0)[0] - vert_max = vertices.max(dim=0)[0] - vert_center = 0.5 * (vert_min + vert_max) - return vertices - vert_center - - -class Latent2MeshOutput: - - def __init__(self, mesh_v=None, mesh_f=None): - self.mesh_v = mesh_v - self.mesh_f = mesh_f - - -class ShapeVAE(nn.Module): - def __init__( - self, - *, - num_latents: int, - embed_dim: int, - width: int, - heads: int, - num_decoder_layers: int, - num_freqs: int = 8, - include_pi: bool = True, - qkv_bias: bool = True, - qk_norm: bool = False, - label_type: str = "binary", - drop_path_rate: float = 0.0, - scale_factor: float = 1.0, - ): - super().__init__() - self.fourier_embedder = FourierEmbedder(num_freqs=num_freqs, include_pi=include_pi) - - self.post_kl = nn.Linear(embed_dim, width) - - self.transformer = Transformer( - n_ctx=num_latents, - width=width, - layers=num_decoder_layers, - heads=heads, - qkv_bias=qkv_bias, - qk_norm=qk_norm, - drop_path_rate=drop_path_rate - ) - - self.geo_decoder = CrossAttentionDecoder( - fourier_embedder=self.fourier_embedder, - out_channels=1, - num_latents=num_latents, - width=width, - heads=heads, - qkv_bias=qkv_bias, - qk_norm=qk_norm, - label_type=label_type, - ) - - self.scale_factor = scale_factor - self.latent_shape = (num_latents, embed_dim) - - def forward(self, latents): - latents = self.post_kl(latents) - latents = self.transformer(latents) - return latents - - @torch.no_grad() - def latents2mesh( - self, - latents: torch.FloatTensor, - bounds: Union[Tuple[float], List[float], float] = 1.1, - octree_depth: int = 7, - num_chunks: int = 10000, - mc_level: float = -1 / 512, - octree_resolution: int = None, - mc_algo: str = 'dmc', - ): - device = latents.device - - # 1. generate query points - if isinstance(bounds, float): - bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds] - bbox_min = np.array(bounds[0:3]) - bbox_max = np.array(bounds[3:6]) - bbox_size = bbox_max - bbox_min - xyz_samples, grid_size, length = generate_dense_grid_points( - bbox_min=bbox_min, - bbox_max=bbox_max, - octree_depth=octree_depth, - octree_resolution=octree_resolution, - indexing="ij" - ) - xyz_samples = torch.FloatTensor(xyz_samples) - - # 2. latents to 3d volume - batch_logits = [] - batch_size = latents.shape[0] - for start in tqdm(range(0, xyz_samples.shape[0], num_chunks), - desc=f"MC Level {mc_level} Implicit Function:"): - queries = xyz_samples[start: start + num_chunks, :].to(device) - queries = queries.half() - batch_queries = repeat(queries, "p c -> b p c", b=batch_size) - - logits = self.geo_decoder(batch_queries.to(latents.dtype), latents) - if mc_level == -1: - mc_level = 0 - logits = torch.sigmoid(logits) * 2 - 1 - print(f'Training with soft labels, inference with sigmoid and marching cubes level 0.') - batch_logits.append(logits) - grid_logits = torch.cat(batch_logits, dim=1) - grid_logits = grid_logits.view((batch_size, grid_size[0], grid_size[1], grid_size[2])).float() - - # 3. extract surface - outputs = [] - for i in range(batch_size): - try: - if mc_algo == 'mc': - vertices, faces, normals, _ = measure.marching_cubes( - grid_logits[i].cpu().numpy(), - mc_level, - method="lewiner" - ) - vertices = vertices / grid_size * bbox_size + bbox_min - elif mc_algo == 'dmc': - if not hasattr(self, 'dmc'): - try: - from diso import DiffDMC - except: - raise ImportError("Please install diso via `pip install diso`, or set mc_algo to 'mc'") - self.dmc = DiffDMC(dtype=torch.float32).to(device) - octree_resolution = 2 ** octree_depth if octree_resolution is None else octree_resolution - sdf = -grid_logits[i] / octree_resolution - verts, faces = self.dmc(sdf, deform=None, return_quads=False, normalize=True) - verts = center_vertices(verts) - vertices = verts.detach().cpu().numpy() - faces = faces.detach().cpu().numpy()[:, ::-1] - else: - raise ValueError(f"mc_algo {mc_algo} not supported.") - - outputs.append( - Latent2MeshOutput( - mesh_v=vertices.astype(np.float32), - mesh_f=np.ascontiguousarray(faces) - ) - ) - - except ValueError: - outputs.append(None) - except RuntimeError: - outputs.append(None) - - return outputs diff --git a/hy3dgen/shapegen/models/autoencoders/attention_processors.py b/hy3dgen/shapegen/models/autoencoders/attention_processors.py new file mode 100644 index 0000000000000000000000000000000000000000..f7b232eb0d16d67b9598a7e49f57d6616f2e5bed --- /dev/null +++ b/hy3dgen/shapegen/models/autoencoders/attention_processors.py @@ -0,0 +1,96 @@ +# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT +# except for the third-party components listed below. +# Hunyuan 3D does not impose any additional limitations beyond what is outlined +# in the repsective licenses of these third-party components. +# Users must comply with all terms and conditions of original licenses of these third-party +# components and must ensure that the usage of the third party components adheres to +# all relevant laws and regulations. + +# For avoidance of doubts, Hunyuan 3D means the large language models and +# their software and algorithms, including trained model weights, parameters (including +# optimizer states), machine-learning model code, inference-enabling code, training-enabling code, +# fine-tuning enabling code and other elements of the foregoing made publicly available +# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. + +import os + +import torch +import torch.nn.functional as F + +scaled_dot_product_attention = F.scaled_dot_product_attention +if os.environ.get('CA_USE_SAGEATTN', '0') == '1': + try: + from sageattention import sageattn + except ImportError: + raise ImportError('Please install the package "sageattention" to use this USE_SAGEATTN.') + scaled_dot_product_attention = sageattn + + +class CrossAttentionProcessor: + def __call__(self, attn, q, k, v): + out = scaled_dot_product_attention(q, k, v) + return out + + +class FlashVDMCrossAttentionProcessor: + def __init__(self, topk=None): + self.topk = topk + + def __call__(self, attn, q, k, v): + if k.shape[-2] == 3072: + topk = 1024 + elif k.shape[-2] == 512: + topk = 256 + else: + topk = k.shape[-2] // 3 + + if self.topk is True: + q1 = q[:, :, ::100, :] + sim = q1 @ k.transpose(-1, -2) + sim = torch.mean(sim, -2) + topk_ind = torch.topk(sim, dim=-1, k=topk).indices.squeeze(-2).unsqueeze(-1) + topk_ind = topk_ind.expand(-1, -1, -1, v.shape[-1]) + v0 = torch.gather(v, dim=-2, index=topk_ind) + k0 = torch.gather(k, dim=-2, index=topk_ind) + out = scaled_dot_product_attention(q, k0, v0) + elif self.topk is False: + out = scaled_dot_product_attention(q, k, v) + else: + idx, counts = self.topk + start = 0 + outs = [] + for grid_coord, count in zip(idx, counts): + end = start + count + q_chunk = q[:, :, start:end, :] + k0, v0 = self.select_topkv(q_chunk, k, v, topk) + out = scaled_dot_product_attention(q_chunk, k0, v0) + outs.append(out) + start += count + out = torch.cat(outs, dim=-2) + self.topk = False + return out + + def select_topkv(self, q_chunk, k, v, topk): + q1 = q_chunk[:, :, ::50, :] + sim = q1 @ k.transpose(-1, -2) + sim = torch.mean(sim, -2) + topk_ind = torch.topk(sim, dim=-1, k=topk).indices.squeeze(-2).unsqueeze(-1) + topk_ind = topk_ind.expand(-1, -1, -1, v.shape[-1]) + v0 = torch.gather(v, dim=-2, index=topk_ind) + k0 = torch.gather(k, dim=-2, index=topk_ind) + return k0, v0 + + +class FlashVDMTopMCrossAttentionProcessor(FlashVDMCrossAttentionProcessor): + def select_topkv(self, q_chunk, k, v, topk): + q1 = q_chunk[:, :, ::30, :] + sim = q1 @ k.transpose(-1, -2) + # sim = sim.to(torch.float32) + sim = sim.softmax(-1) + sim = torch.mean(sim, 1) + activated_token = torch.where(sim > 1e-6)[2] + index = torch.unique(activated_token, return_counts=True)[0].unsqueeze(0).unsqueeze(0).unsqueeze(-1) + index = index.expand(-1, v.shape[1], -1, v.shape[-1]) + v0 = torch.gather(v, dim=-2, index=index) + k0 = torch.gather(k, dim=-2, index=index) + return k0, v0 diff --git a/hy3dgen/shapegen/models/autoencoders/model.py b/hy3dgen/shapegen/models/autoencoders/model.py new file mode 100644 index 0000000000000000000000000000000000000000..76f78da2445470f3614b3cd9d75a3133bafc3a4e --- /dev/null +++ b/hy3dgen/shapegen/models/autoencoders/model.py @@ -0,0 +1,189 @@ +# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT +# except for the third-party components listed below. +# Hunyuan 3D does not impose any additional limitations beyond what is outlined +# in the repsective licenses of these third-party components. +# Users must comply with all terms and conditions of original licenses of these third-party +# components and must ensure that the usage of the third party components adheres to +# all relevant laws and regulations. + +# For avoidance of doubts, Hunyuan 3D means the large language models and +# their software and algorithms, including trained model weights, parameters (including +# optimizer states), machine-learning model code, inference-enabling code, training-enabling code, +# fine-tuning enabling code and other elements of the foregoing made publicly available +# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. + +import os + +import torch +import torch.nn as nn +import yaml + +from .attention_blocks import FourierEmbedder, Transformer, CrossAttentionDecoder +from .surface_extractors import MCSurfaceExtractor, SurfaceExtractors +from .volume_decoders import VanillaVolumeDecoder, FlashVDMVolumeDecoding, HierarchicalVolumeDecoding +from ...utils import logger, synchronize_timer, smart_load_model + + +class VectsetVAE(nn.Module): + + @classmethod + @synchronize_timer('VectsetVAE Model Loading') + def from_single_file( + cls, + ckpt_path, + config_path, + device='cuda', + dtype=torch.float16, + use_safetensors=None, + **kwargs, + ): + # load config + with open(config_path, 'r') as f: + config = yaml.safe_load(f) + + # load ckpt + if use_safetensors: + ckpt_path = ckpt_path.replace('.ckpt', '.safetensors') + if not os.path.exists(ckpt_path): + raise FileNotFoundError(f"Model file {ckpt_path} not found") + + logger.info(f"Loading model from {ckpt_path}") + if use_safetensors: + import safetensors.torch + ckpt = safetensors.torch.load_file(ckpt_path, device='cpu') + else: + ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=True) + + model_kwargs = config['params'] + model_kwargs.update(kwargs) + + model = cls(**model_kwargs) + model.load_state_dict(ckpt) + model.to(device=device, dtype=dtype) + return model + + @classmethod + def from_pretrained( + cls, + model_path, + device='cuda', + dtype=torch.float16, + use_safetensors=True, + variant='fp16', + subfolder='hunyuan3d-vae-v2-0', + **kwargs, + ): + config_path, ckpt_path = smart_load_model( + model_path, + subfolder=subfolder, + use_safetensors=use_safetensors, + variant=variant + ) + + return cls.from_single_file( + ckpt_path, + config_path, + device=device, + dtype=dtype, + use_safetensors=use_safetensors, + **kwargs + ) + + def __init__( + self, + volume_decoder=None, + surface_extractor=None + ): + super().__init__() + if volume_decoder is None: + volume_decoder = VanillaVolumeDecoder() + if surface_extractor is None: + surface_extractor = MCSurfaceExtractor() + self.volume_decoder = volume_decoder + self.surface_extractor = surface_extractor + + def latents2mesh(self, latents: torch.FloatTensor, **kwargs): + with synchronize_timer('Volume decoding'): + grid_logits = self.volume_decoder(latents, self.geo_decoder, **kwargs) + with synchronize_timer('Surface extraction'): + outputs = self.surface_extractor(grid_logits, **kwargs) + return outputs + + def enable_flashvdm_decoder( + self, + enabled: bool = True, + adaptive_kv_selection=True, + topk_mode='mean', + mc_algo='dmc', + ): + if enabled: + if adaptive_kv_selection: + self.volume_decoder = FlashVDMVolumeDecoding(topk_mode) + else: + self.volume_decoder = HierarchicalVolumeDecoding() + if mc_algo not in SurfaceExtractors.keys(): + raise ValueError(f'Unsupported mc_algo {mc_algo}, available: {list(SurfaceExtractors.keys())}') + self.surface_extractor = SurfaceExtractors[mc_algo]() + else: + self.volume_decoder = VanillaVolumeDecoder() + self.surface_extractor = MCSurfaceExtractor() + + +class ShapeVAE(VectsetVAE): + def __init__( + self, + *, + num_latents: int, + embed_dim: int, + width: int, + heads: int, + num_decoder_layers: int, + geo_decoder_downsample_ratio: int = 1, + geo_decoder_mlp_expand_ratio: int = 4, + geo_decoder_ln_post: bool = True, + num_freqs: int = 8, + include_pi: bool = True, + qkv_bias: bool = True, + qk_norm: bool = False, + label_type: str = "binary", + drop_path_rate: float = 0.0, + scale_factor: float = 1.0, + ): + super().__init__() + self.geo_decoder_ln_post = geo_decoder_ln_post + + self.fourier_embedder = FourierEmbedder(num_freqs=num_freqs, include_pi=include_pi) + + self.post_kl = nn.Linear(embed_dim, width) + + self.transformer = Transformer( + n_ctx=num_latents, + width=width, + layers=num_decoder_layers, + heads=heads, + qkv_bias=qkv_bias, + qk_norm=qk_norm, + drop_path_rate=drop_path_rate + ) + + self.geo_decoder = CrossAttentionDecoder( + fourier_embedder=self.fourier_embedder, + out_channels=1, + num_latents=num_latents, + mlp_expand_ratio=geo_decoder_mlp_expand_ratio, + downsample_ratio=geo_decoder_downsample_ratio, + enable_ln_post=self.geo_decoder_ln_post, + width=width // geo_decoder_downsample_ratio, + heads=heads // geo_decoder_downsample_ratio, + qkv_bias=qkv_bias, + qk_norm=qk_norm, + label_type=label_type, + ) + + self.scale_factor = scale_factor + self.latent_shape = (num_latents, embed_dim) + + def forward(self, latents): + latents = self.post_kl(latents) + latents = self.transformer(latents) + return latents diff --git a/hy3dgen/shapegen/models/autoencoders/surface_extractors.py b/hy3dgen/shapegen/models/autoencoders/surface_extractors.py new file mode 100644 index 0000000000000000000000000000000000000000..f4d8f63ab2cc56ddd2171f299519b226d77f2eba --- /dev/null +++ b/hy3dgen/shapegen/models/autoencoders/surface_extractors.py @@ -0,0 +1,100 @@ +# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT +# except for the third-party components listed below. +# Hunyuan 3D does not impose any additional limitations beyond what is outlined +# in the repsective licenses of these third-party components. +# Users must comply with all terms and conditions of original licenses of these third-party +# components and must ensure that the usage of the third party components adheres to +# all relevant laws and regulations. + +# For avoidance of doubts, Hunyuan 3D means the large language models and +# their software and algorithms, including trained model weights, parameters (including +# optimizer states), machine-learning model code, inference-enabling code, training-enabling code, +# fine-tuning enabling code and other elements of the foregoing made publicly available +# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. + +from typing import Union, Tuple, List + +import numpy as np +import torch +from skimage import measure + + +class Latent2MeshOutput: + + def __init__(self, mesh_v=None, mesh_f=None): + self.mesh_v = mesh_v + self.mesh_f = mesh_f + + +def center_vertices(vertices): + """Translate the vertices so that bounding box is centered at zero.""" + vert_min = vertices.min(dim=0)[0] + vert_max = vertices.max(dim=0)[0] + vert_center = 0.5 * (vert_min + vert_max) + return vertices - vert_center + + +class SurfaceExtractor: + def _compute_box_stat(self, bounds: Union[Tuple[float], List[float], float], octree_resolution: int): + if isinstance(bounds, float): + bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds] + + bbox_min, bbox_max = np.array(bounds[0:3]), np.array(bounds[3:6]) + bbox_size = bbox_max - bbox_min + grid_size = [int(octree_resolution) + 1, int(octree_resolution) + 1, int(octree_resolution) + 1] + return grid_size, bbox_min, bbox_size + + def run(self, *args, **kwargs): + return NotImplementedError + + def __call__(self, grid_logits, **kwargs): + outputs = [] + for i in range(grid_logits.shape[0]): + try: + vertices, faces = self.run(grid_logits[i], **kwargs) + vertices = vertices.astype(np.float32) + faces = np.ascontiguousarray(faces) + outputs.append(Latent2MeshOutput(mesh_v=vertices, mesh_f=faces)) + + except Exception: + import traceback + traceback.print_exc() + outputs.append(None) + + return outputs + + +class MCSurfaceExtractor(SurfaceExtractor): + def run(self, grid_logit, *, mc_level, bounds, octree_resolution, **kwargs): + vertices, faces, normals, _ = measure.marching_cubes( + grid_logit.cpu().numpy(), + mc_level, + method="lewiner" + ) + grid_size, bbox_min, bbox_size = self._compute_box_stat(bounds, octree_resolution) + vertices = vertices / grid_size * bbox_size + bbox_min + return vertices, faces + + +class DMCSurfaceExtractor(SurfaceExtractor): + def run(self, grid_logit, *, octree_resolution, **kwargs): + device = grid_logit.device + if not hasattr(self, 'dmc'): + try: + from diso import DiffDMC + except: + raise ImportError("Please install diso via `pip install diso`, or set mc_algo to 'mc'") + self.dmc = DiffDMC(dtype=torch.float32).to(device) + sdf = -grid_logit / octree_resolution + sdf = sdf.to(torch.float32).contiguous() + verts, faces = self.dmc(sdf, deform=None, return_quads=False, normalize=True) + verts = center_vertices(verts) + vertices = verts.detach().cpu().numpy() + faces = faces.detach().cpu().numpy()[:, ::-1] + return vertices, faces + + +SurfaceExtractors = { + 'mc': MCSurfaceExtractor, + 'dmc': DMCSurfaceExtractor, +} diff --git a/hy3dgen/shapegen/models/autoencoders/volume_decoders.py b/hy3dgen/shapegen/models/autoencoders/volume_decoders.py new file mode 100644 index 0000000000000000000000000000000000000000..d7bfd84647786b43d7815c788931326a305c7dca --- /dev/null +++ b/hy3dgen/shapegen/models/autoencoders/volume_decoders.py @@ -0,0 +1,435 @@ +# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT +# except for the third-party components listed below. +# Hunyuan 3D does not impose any additional limitations beyond what is outlined +# in the repsective licenses of these third-party components. +# Users must comply with all terms and conditions of original licenses of these third-party +# components and must ensure that the usage of the third party components adheres to +# all relevant laws and regulations. + +# For avoidance of doubts, Hunyuan 3D means the large language models and +# their software and algorithms, including trained model weights, parameters (including +# optimizer states), machine-learning model code, inference-enabling code, training-enabling code, +# fine-tuning enabling code and other elements of the foregoing made publicly available +# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. + +from typing import Union, Tuple, List, Callable + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops import repeat +from tqdm import tqdm + +from .attention_blocks import CrossAttentionDecoder +from .attention_processors import FlashVDMCrossAttentionProcessor, FlashVDMTopMCrossAttentionProcessor +from ...utils import logger + + +def extract_near_surface_volume_fn(input_tensor: torch.Tensor, alpha: float): + device = input_tensor.device + D = input_tensor.shape[0] + signed_val = 0.0 + + # 添加偏移并处理无效值 + val = input_tensor + alpha + valid_mask = val > -9000 # 假设-9000是无效值 + + # 改进的邻居获取函数(保持维度一致) + def get_neighbor(t, shift, axis): + """根据指定轴进行位移并保持维度一致""" + if shift == 0: + return t.clone() + + # 确定填充轴(输入为[D, D, D]对应z,y,x轴) + pad_dims = [0, 0, 0, 0, 0, 0] # 格式:[x前,x后,y前,y后,z前,z后] + + # 根据轴类型设置填充 + if axis == 0: # x轴(最后一个维度) + pad_idx = 0 if shift > 0 else 1 + pad_dims[pad_idx] = abs(shift) + elif axis == 1: # y轴(中间维度) + pad_idx = 2 if shift > 0 else 3 + pad_dims[pad_idx] = abs(shift) + elif axis == 2: # z轴(第一个维度) + pad_idx = 4 if shift > 0 else 5 + pad_dims[pad_idx] = abs(shift) + + # 执行填充(添加batch和channel维度适配F.pad) + padded = F.pad(t.unsqueeze(0).unsqueeze(0), pad_dims[::-1], mode='replicate') # 反转顺序适配F.pad + + # 构建动态切片索引 + slice_dims = [slice(None)] * 3 # 初始化为全切片 + if axis == 0: # x轴(dim=2) + if shift > 0: + slice_dims[0] = slice(shift, None) + else: + slice_dims[0] = slice(None, shift) + elif axis == 1: # y轴(dim=1) + if shift > 0: + slice_dims[1] = slice(shift, None) + else: + slice_dims[1] = slice(None, shift) + elif axis == 2: # z轴(dim=0) + if shift > 0: + slice_dims[2] = slice(shift, None) + else: + slice_dims[2] = slice(None, shift) + + # 应用切片并恢复维度 + padded = padded.squeeze(0).squeeze(0) + sliced = padded[slice_dims] + return sliced + + # 获取各方向邻居(确保维度一致) + left = get_neighbor(val, 1, axis=0) # x方向 + right = get_neighbor(val, -1, axis=0) + back = get_neighbor(val, 1, axis=1) # y方向 + front = get_neighbor(val, -1, axis=1) + down = get_neighbor(val, 1, axis=2) # z方向 + up = get_neighbor(val, -1, axis=2) + + # 处理边界无效值(使用where保持维度一致) + def safe_where(neighbor): + return torch.where(neighbor > -9000, neighbor, val) + + left = safe_where(left) + right = safe_where(right) + back = safe_where(back) + front = safe_where(front) + down = safe_where(down) + up = safe_where(up) + + # 计算符号一致性(转换为float32确保精度) + sign = torch.sign(val.to(torch.float32)) + neighbors_sign = torch.stack([ + torch.sign(left.to(torch.float32)), + torch.sign(right.to(torch.float32)), + torch.sign(back.to(torch.float32)), + torch.sign(front.to(torch.float32)), + torch.sign(down.to(torch.float32)), + torch.sign(up.to(torch.float32)) + ], dim=0) + + # 检查所有符号是否一致 + same_sign = torch.all(neighbors_sign == sign, dim=0) + + # 生成最终掩码 + mask = (~same_sign).to(torch.int32) + return mask * valid_mask.to(torch.int32) + + +def generate_dense_grid_points( + bbox_min: np.ndarray, + bbox_max: np.ndarray, + octree_resolution: int, + indexing: str = "ij", +): + length = bbox_max - bbox_min + num_cells = octree_resolution + + x = np.linspace(bbox_min[0], bbox_max[0], int(num_cells) + 1, dtype=np.float32) + y = np.linspace(bbox_min[1], bbox_max[1], int(num_cells) + 1, dtype=np.float32) + z = np.linspace(bbox_min[2], bbox_max[2], int(num_cells) + 1, dtype=np.float32) + [xs, ys, zs] = np.meshgrid(x, y, z, indexing=indexing) + xyz = np.stack((xs, ys, zs), axis=-1) + grid_size = [int(num_cells) + 1, int(num_cells) + 1, int(num_cells) + 1] + + return xyz, grid_size, length + + +class VanillaVolumeDecoder: + @torch.no_grad() + def __call__( + self, + latents: torch.FloatTensor, + geo_decoder: Callable, + bounds: Union[Tuple[float], List[float], float] = 1.01, + num_chunks: int = 10000, + octree_resolution: int = None, + enable_pbar: bool = True, + **kwargs, + ): + device = latents.device + dtype = latents.dtype + batch_size = latents.shape[0] + + # 1. generate query points + if isinstance(bounds, float): + bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds] + + bbox_min, bbox_max = np.array(bounds[0:3]), np.array(bounds[3:6]) + xyz_samples, grid_size, length = generate_dense_grid_points( + bbox_min=bbox_min, + bbox_max=bbox_max, + octree_resolution=octree_resolution, + indexing="ij" + ) + xyz_samples = torch.from_numpy(xyz_samples).to(device, dtype=dtype).contiguous().reshape(-1, 3) + + # 2. latents to 3d volume + batch_logits = [] + for start in tqdm(range(0, xyz_samples.shape[0], num_chunks), desc=f"Volume Decoding", + disable=not enable_pbar): + chunk_queries = xyz_samples[start: start + num_chunks, :] + chunk_queries = repeat(chunk_queries, "p c -> b p c", b=batch_size) + logits = geo_decoder(queries=chunk_queries, latents=latents) + batch_logits.append(logits) + + grid_logits = torch.cat(batch_logits, dim=1) + grid_logits = grid_logits.view((batch_size, *grid_size)).float() + + return grid_logits + + +class HierarchicalVolumeDecoding: + @torch.no_grad() + def __call__( + self, + latents: torch.FloatTensor, + geo_decoder: Callable, + bounds: Union[Tuple[float], List[float], float] = 1.01, + num_chunks: int = 10000, + mc_level: float = 0.0, + octree_resolution: int = None, + min_resolution: int = 63, + enable_pbar: bool = True, + **kwargs, + ): + device = latents.device + dtype = latents.dtype + + resolutions = [] + if octree_resolution < min_resolution: + resolutions.append(octree_resolution) + while octree_resolution >= min_resolution: + resolutions.append(octree_resolution) + octree_resolution = octree_resolution // 2 + resolutions.reverse() + + # 1. generate query points + if isinstance(bounds, float): + bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds] + bbox_min = np.array(bounds[0:3]) + bbox_max = np.array(bounds[3:6]) + bbox_size = bbox_max - bbox_min + + xyz_samples, grid_size, length = generate_dense_grid_points( + bbox_min=bbox_min, + bbox_max=bbox_max, + octree_resolution=resolutions[0], + indexing="ij" + ) + + dilate = nn.Conv3d(1, 1, 3, padding=1, bias=False, device=device, dtype=dtype) + dilate.weight = torch.nn.Parameter(torch.ones(dilate.weight.shape, dtype=dtype, device=device)) + + grid_size = np.array(grid_size) + xyz_samples = torch.from_numpy(xyz_samples).to(device, dtype=dtype).contiguous().reshape(-1, 3) + + # 2. latents to 3d volume + batch_logits = [] + batch_size = latents.shape[0] + for start in tqdm(range(0, xyz_samples.shape[0], num_chunks), + desc=f"Hierarchical Volume Decoding [r{resolutions[0] + 1}]"): + queries = xyz_samples[start: start + num_chunks, :] + batch_queries = repeat(queries, "p c -> b p c", b=batch_size) + logits = geo_decoder(queries=batch_queries, latents=latents) + batch_logits.append(logits) + + grid_logits = torch.cat(batch_logits, dim=1).view((batch_size, grid_size[0], grid_size[1], grid_size[2])) + + for octree_depth_now in resolutions[1:]: + grid_size = np.array([octree_depth_now + 1] * 3) + resolution = bbox_size / octree_depth_now + next_index = torch.zeros(tuple(grid_size), dtype=dtype, device=device) + next_logits = torch.full(next_index.shape, -10000., dtype=dtype, device=device) + curr_points = extract_near_surface_volume_fn(grid_logits.squeeze(0), mc_level) + curr_points += grid_logits.squeeze(0).abs() < 0.95 + + if octree_depth_now == resolutions[-1]: + expand_num = 0 + else: + expand_num = 1 + for i in range(expand_num): + curr_points = dilate(curr_points.unsqueeze(0).to(dtype)).squeeze(0) + (cidx_x, cidx_y, cidx_z) = torch.where(curr_points > 0) + next_index[cidx_x * 2, cidx_y * 2, cidx_z * 2] = 1 + for i in range(2 - expand_num): + next_index = dilate(next_index.unsqueeze(0)).squeeze(0) + nidx = torch.where(next_index > 0) + + next_points = torch.stack(nidx, dim=1) + next_points = (next_points * torch.tensor(resolution, dtype=next_points.dtype, device=device) + + torch.tensor(bbox_min, dtype=next_points.dtype, device=device)) + batch_logits = [] + for start in tqdm(range(0, next_points.shape[0], num_chunks), + desc=f"Hierarchical Volume Decoding [r{octree_depth_now + 1}]"): + queries = next_points[start: start + num_chunks, :] + batch_queries = repeat(queries, "p c -> b p c", b=batch_size) + logits = geo_decoder(queries=batch_queries.to(latents.dtype), latents=latents) + batch_logits.append(logits) + grid_logits = torch.cat(batch_logits, dim=1) + next_logits[nidx] = grid_logits[0, ..., 0] + grid_logits = next_logits.unsqueeze(0) + grid_logits[grid_logits == -10000.] = float('nan') + + return grid_logits + + +class FlashVDMVolumeDecoding: + def __init__(self, topk_mode='mean'): + if topk_mode not in ['mean', 'merge']: + raise ValueError(f'Unsupported topk_mode {topk_mode}, available: {["mean", "merge"]}') + + if topk_mode == 'mean': + self.processor = FlashVDMCrossAttentionProcessor() + else: + self.processor = FlashVDMTopMCrossAttentionProcessor() + + @torch.no_grad() + def __call__( + self, + latents: torch.FloatTensor, + geo_decoder: CrossAttentionDecoder, + bounds: Union[Tuple[float], List[float], float] = 1.01, + num_chunks: int = 10000, + mc_level: float = 0.0, + octree_resolution: int = None, + min_resolution: int = 63, + mini_grid_num: int = 4, + enable_pbar: bool = True, + **kwargs, + ): + processor = self.processor + geo_decoder.set_cross_attention_processor(processor) + + device = latents.device + dtype = latents.dtype + + resolutions = [] + if octree_resolution < min_resolution: + resolutions.append(octree_resolution) + while octree_resolution >= min_resolution: + resolutions.append(octree_resolution) + octree_resolution = octree_resolution // 2 + resolutions.reverse() + resolutions[0] = round(resolutions[0] / mini_grid_num) * mini_grid_num - 1 + for i, resolution in enumerate(resolutions[1:]): + resolutions[i + 1] = resolutions[0] * 2 ** (i + 1) + + logger.info(f"FlashVDMVolumeDecoding Resolution: {resolutions}") + + # 1. generate query points + if isinstance(bounds, float): + bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds] + bbox_min = np.array(bounds[0:3]) + bbox_max = np.array(bounds[3:6]) + bbox_size = bbox_max - bbox_min + + xyz_samples, grid_size, length = generate_dense_grid_points( + bbox_min=bbox_min, + bbox_max=bbox_max, + octree_resolution=resolutions[0], + indexing="ij" + ) + + dilate = nn.Conv3d(1, 1, 3, padding=1, bias=False, device=device, dtype=dtype) + dilate.weight = torch.nn.Parameter(torch.ones(dilate.weight.shape, dtype=dtype, device=device)) + + grid_size = np.array(grid_size) + + # 2. latents to 3d volume + xyz_samples = torch.from_numpy(xyz_samples).to(device, dtype=dtype) + batch_size = latents.shape[0] + mini_grid_size = xyz_samples.shape[0] // mini_grid_num + xyz_samples = xyz_samples.view( + mini_grid_num, mini_grid_size, + mini_grid_num, mini_grid_size, + mini_grid_num, mini_grid_size, 3 + ).permute( + 0, 2, 4, 1, 3, 5, 6 + ).reshape( + -1, mini_grid_size * mini_grid_size * mini_grid_size, 3 + ) + batch_logits = [] + num_batchs = max(num_chunks // xyz_samples.shape[1], 1) + for start in tqdm(range(0, xyz_samples.shape[0], num_batchs), + desc=f"FlashVDM Volume Decoding", disable=not enable_pbar): + queries = xyz_samples[start: start + num_batchs, :] + batch = queries.shape[0] + batch_latents = repeat(latents.squeeze(0), "p c -> b p c", b=batch) + processor.topk = True + logits = geo_decoder(queries=queries, latents=batch_latents) + batch_logits.append(logits) + grid_logits = torch.cat(batch_logits, dim=0).reshape( + mini_grid_num, mini_grid_num, mini_grid_num, + mini_grid_size, mini_grid_size, + mini_grid_size + ).permute(0, 3, 1, 4, 2, 5).contiguous().view( + (batch_size, grid_size[0], grid_size[1], grid_size[2]) + ) + + for octree_depth_now in resolutions[1:]: + grid_size = np.array([octree_depth_now + 1] * 3) + resolution = bbox_size / octree_depth_now + next_index = torch.zeros(tuple(grid_size), dtype=dtype, device=device) + next_logits = torch.full(next_index.shape, -10000., dtype=dtype, device=device) + curr_points = extract_near_surface_volume_fn(grid_logits.squeeze(0), mc_level) + curr_points += grid_logits.squeeze(0).abs() < 0.95 + + if octree_depth_now == resolutions[-1]: + expand_num = 0 + else: + expand_num = 1 + for i in range(expand_num): + curr_points = dilate(curr_points.unsqueeze(0).to(dtype)).squeeze(0) + (cidx_x, cidx_y, cidx_z) = torch.where(curr_points > 0) + + next_index[cidx_x * 2, cidx_y * 2, cidx_z * 2] = 1 + for i in range(2 - expand_num): + next_index = dilate(next_index.unsqueeze(0)).squeeze(0) + nidx = torch.where(next_index > 0) + + next_points = torch.stack(nidx, dim=1) + next_points = (next_points * torch.tensor(resolution, dtype=torch.float32, device=device) + + torch.tensor(bbox_min, dtype=torch.float32, device=device)) + + query_grid_num = 6 + min_val = next_points.min(axis=0).values + max_val = next_points.max(axis=0).values + vol_queries_index = (next_points - min_val) / (max_val - min_val) * (query_grid_num - 0.001) + index = torch.floor(vol_queries_index).long() + index = index[..., 0] * (query_grid_num ** 2) + index[..., 1] * query_grid_num + index[..., 2] + index = index.sort() + next_points = next_points[index.indices].unsqueeze(0).contiguous() + unique_values = torch.unique(index.values, return_counts=True) + grid_logits = torch.zeros((next_points.shape[1]), dtype=latents.dtype, device=latents.device) + input_grid = [[], []] + logits_grid_list = [] + start_num = 0 + sum_num = 0 + for grid_index, count in zip(unique_values[0].cpu().tolist(), unique_values[1].cpu().tolist()): + if sum_num + count < num_chunks or sum_num == 0: + sum_num += count + input_grid[0].append(grid_index) + input_grid[1].append(count) + else: + processor.topk = input_grid + logits_grid = geo_decoder(queries=next_points[:, start_num:start_num + sum_num], latents=latents) + start_num = start_num + sum_num + logits_grid_list.append(logits_grid) + input_grid = [[grid_index], [count]] + sum_num = count + if sum_num > 0: + processor.topk = input_grid + logits_grid = geo_decoder(queries=next_points[:, start_num:start_num + sum_num], latents=latents) + logits_grid_list.append(logits_grid) + logits_grid = torch.cat(logits_grid_list, dim=1) + grid_logits[index.indices] = logits_grid.squeeze(0).squeeze(-1) + next_logits[nidx] = grid_logits + grid_logits = next_logits.unsqueeze(0) + + grid_logits[grid_logits == -10000.] = float('nan') + + return grid_logits diff --git a/hy3dgen/shapegen/models/conditioner.py b/hy3dgen/shapegen/models/conditioner.py old mode 100644 new mode 100755 index 1af4c0cc440a193167c0837621c3494242b95f3d..d0d848c3b0d82eba4e4453d2c266f8fa7a1aeaaa --- a/hy3dgen/shapegen/models/conditioner.py +++ b/hy3dgen/shapegen/models/conditioner.py @@ -22,6 +22,7 @@ # fine-tuning enabling code and other elements of the foregoing made publicly available # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. +import numpy as np import torch import torch.nn as nn from torchvision import transforms @@ -33,6 +34,26 @@ from transformers import ( ) +def get_1d_sincos_pos_embed_from_grid(embed_dim, pos): + """ + embed_dim: output dimension for each position + pos: a list of positions to be encoded: size (M,) + out: (M, D) + """ + assert embed_dim % 2 == 0 + omega = np.arange(embed_dim // 2, dtype=np.float64) + omega /= embed_dim / 2. + omega = 1. / 10000 ** omega # (D/2,) + + pos = pos.reshape(-1) # (M,) + out = np.einsum('m,d->md', pos, omega) # (M, D/2), outer product + + emb_sin = np.sin(out) # (M, D/2) + emb_cos = np.cos(out) # (M, D/2) + + return np.concatenate([emb_sin, emb_cos], axis=1) + + class ImageEncoder(nn.Module): def __init__( self, @@ -67,7 +88,7 @@ class ImageEncoder(nn.Module): ] ) - def forward(self, image, mask=None, value_range=(-1, 1)): + def forward(self, image, mask=None, value_range=(-1, 1), **kwargs): if value_range is not None: low, high = value_range image = (image - low) / (high - low) @@ -82,7 +103,7 @@ class ImageEncoder(nn.Module): return last_hidden_state - def unconditional_embedding(self, batch_size): + def unconditional_embedding(self, batch_size, **kwargs): device = next(self.model.parameters()).device dtype = next(self.model.parameters()).dtype zero = torch.zeros( @@ -110,11 +131,82 @@ class DinoImageEncoder(ImageEncoder): std = [0.229, 0.224, 0.225] +class DinoImageEncoderMV(DinoImageEncoder): + def __init__( + self, + version=None, + config=None, + use_cls_token=True, + image_size=224, + view_num=4, + **kwargs, + ): + super().__init__(version, config, use_cls_token, image_size, **kwargs) + self.view_num = view_num + self.num_patches = self.num_patches + pos = np.arange(self.view_num, dtype=np.float32) + view_embedding = torch.from_numpy( + get_1d_sincos_pos_embed_from_grid(self.model.config.hidden_size, pos)).float() + + view_embedding = view_embedding.unsqueeze(1).repeat(1, self.num_patches, 1) + self.view_embed = view_embedding.unsqueeze(0) + + def forward(self, image, mask=None, value_range=(-1, 1), view_idxs=None): + if value_range is not None: + low, high = value_range + image = (image - low) / (high - low) + + image = image.to(self.model.device, dtype=self.model.dtype) + + bs, num_views, c, h, w = image.shape + image = image.view(bs * num_views, c, h, w) + + inputs = self.transform(image) + outputs = self.model(inputs) + + last_hidden_state = outputs.last_hidden_state + last_hidden_state = last_hidden_state.view( + bs, num_views, last_hidden_state.shape[-2], + last_hidden_state.shape[-1] + ) + + view_embedding = self.view_embed.to(last_hidden_state.dtype).to(last_hidden_state.device) + if view_idxs is not None: + assert len(view_idxs) == bs + view_embeddings = [] + for i in range(bs): + view_idx = view_idxs[i] + assert num_views == len(view_idx) + view_embeddings.append(self.view_embed[:, view_idx, ...]) + view_embedding = torch.cat(view_embeddings, 0).to(last_hidden_state.dtype).to(last_hidden_state.device) + + if num_views != self.view_num: + view_embedding = view_embedding[:, :num_views, ...] + last_hidden_state = last_hidden_state + view_embedding + last_hidden_state = last_hidden_state.view(bs, num_views * last_hidden_state.shape[-2], + last_hidden_state.shape[-1]) + return last_hidden_state + + def unconditional_embedding(self, batch_size, view_idxs=None, **kwargs): + device = next(self.model.parameters()).device + dtype = next(self.model.parameters()).dtype + zero = torch.zeros( + batch_size, + self.num_patches * len(view_idxs[0]), + self.model.config.hidden_size, + device=device, + dtype=dtype, + ) + return zero + + def build_image_encoder(config): if config['type'] == 'CLIPImageEncoder': return CLIPImageEncoder(**config['kwargs']) elif config['type'] == 'DinoImageEncoder': return DinoImageEncoder(**config['kwargs']) + elif config['type'] == 'DinoImageEncoderMV': + return DinoImageEncoderMV(**config['kwargs']) else: raise ValueError(f'Unknown image encoder type: {config["type"]}') @@ -129,17 +221,17 @@ class DualImageEncoder(nn.Module): self.main_image_encoder = build_image_encoder(main_image_encoder) self.additional_image_encoder = build_image_encoder(additional_image_encoder) - def forward(self, image, mask=None): + def forward(self, image, mask=None, **kwargs): outputs = { - 'main': self.main_image_encoder(image, mask=mask), - 'additional': self.additional_image_encoder(image, mask=mask), + 'main': self.main_image_encoder(image, mask=mask, **kwargs), + 'additional': self.additional_image_encoder(image, mask=mask, **kwargs), } return outputs - def unconditional_embedding(self, batch_size): + def unconditional_embedding(self, batch_size, **kwargs): outputs = { - 'main': self.main_image_encoder.unconditional_embedding(batch_size), - 'additional': self.additional_image_encoder.unconditional_embedding(batch_size), + 'main': self.main_image_encoder.unconditional_embedding(batch_size, **kwargs), + 'additional': self.additional_image_encoder.unconditional_embedding(batch_size, **kwargs), } return outputs @@ -152,14 +244,14 @@ class SingleImageEncoder(nn.Module): super().__init__() self.main_image_encoder = build_image_encoder(main_image_encoder) - def forward(self, image, mask=None): + def forward(self, image, mask=None, **kwargs): outputs = { - 'main': self.main_image_encoder(image, mask=mask), + 'main': self.main_image_encoder(image, mask=mask, **kwargs), } return outputs - def unconditional_embedding(self, batch_size): + def unconditional_embedding(self, batch_size, **kwargs): outputs = { - 'main': self.main_image_encoder.unconditional_embedding(batch_size), + 'main': self.main_image_encoder.unconditional_embedding(batch_size, **kwargs), } return outputs diff --git a/hy3dgen/shapegen/models/denoisers/__init__.py b/hy3dgen/shapegen/models/denoisers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..72609333e27d68d377905ba9e67655de7021c31b --- /dev/null +++ b/hy3dgen/shapegen/models/denoisers/__init__.py @@ -0,0 +1,15 @@ +# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT +# except for the third-party components listed below. +# Hunyuan 3D does not impose any additional limitations beyond what is outlined +# in the repsective licenses of these third-party components. +# Users must comply with all terms and conditions of original licenses of these third-party +# components and must ensure that the usage of the third party components adheres to +# all relevant laws and regulations. + +# For avoidance of doubts, Hunyuan 3D means the large language models and +# their software and algorithms, including trained model weights, parameters (including +# optimizer states), machine-learning model code, inference-enabling code, training-enabling code, +# fine-tuning enabling code and other elements of the foregoing made publicly available +# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. + +from .hunyuan3ddit import Hunyuan3DDiT diff --git a/hy3dgen/shapegen/models/hunyuan3ddit.py b/hy3dgen/shapegen/models/denoisers/hunyuan3ddit.py old mode 100644 new mode 100755 similarity index 91% rename from hy3dgen/shapegen/models/hunyuan3ddit.py rename to hy3dgen/shapegen/models/denoisers/hunyuan3ddit.py index d1c778666890cb13538eba15460cf0c05c7f9130..7873f1660b40b860b3f87cd0b799ce29890f8620 --- a/hy3dgen/shapegen/models/hunyuan3ddit.py +++ b/hy3dgen/shapegen/models/denoisers/hunyuan3ddit.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -23,6 +13,7 @@ # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. import math +import os from dataclasses import dataclass from typing import List, Tuple, Optional @@ -30,9 +21,17 @@ import torch from einops import rearrange from torch import Tensor, nn +scaled_dot_product_attention = nn.functional.scaled_dot_product_attention +if os.environ.get('USE_SAGEATTN', '0') == '1': + try: + from sageattention import sageattn + except ImportError: + raise ImportError('Please install the package "sageattention" to use this USE_SAGEATTN.') + scaled_dot_product_attention = sageattn + def attention(q: Tensor, k: Tensor, v: Tensor, **kwargs) -> Tensor: - x = torch.nn.functional.scaled_dot_product_attention(q, k, v) + x = scaled_dot_product_attention(q, k, v) x = rearrange(x, "B H L D -> B L (H D)") return x @@ -61,6 +60,15 @@ def timestep_embedding(t: Tensor, dim, max_period=10000, time_factor: float = 10 return embedding +class GELU(nn.Module): + def __init__(self, approximate='tanh'): + super().__init__() + self.approximate = approximate + + def forward(self, x: Tensor) -> Tensor: + return nn.functional.gelu(x.contiguous(), approximate=self.approximate) + + class MLPEmbedder(nn.Module): def __init__(self, in_dim: int, hidden_dim: int): super().__init__() @@ -163,7 +171,7 @@ class DoubleStreamBlock(nn.Module): self.img_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) self.img_mlp = nn.Sequential( nn.Linear(hidden_size, mlp_hidden_dim, bias=True), - nn.GELU(approximate="tanh"), + GELU(approximate="tanh"), nn.Linear(mlp_hidden_dim, hidden_size, bias=True), ) @@ -174,7 +182,7 @@ class DoubleStreamBlock(nn.Module): self.txt_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) self.txt_mlp = nn.Sequential( nn.Linear(hidden_size, mlp_hidden_dim, bias=True), - nn.GELU(approximate="tanh"), + GELU(approximate="tanh"), nn.Linear(mlp_hidden_dim, hidden_size, bias=True), ) @@ -240,7 +248,7 @@ class SingleStreamBlock(nn.Module): self.hidden_size = hidden_size self.pre_norm = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) - self.mlp_act = nn.GELU(approximate="tanh") + self.mlp_act = GELU(approximate="tanh") self.modulation = Modulation(hidden_size, double=False) def forward(self, x: Tensor, vec: Tensor, pe: Tensor) -> Tensor: @@ -287,6 +295,7 @@ class Hunyuan3DDiT(nn.Module): theta: int = 10_000, qkv_bias: bool = True, time_factor: float = 1000, + guidance_embed: bool = False, ckpt_path: Optional[str] = None, **kwargs, ): @@ -303,6 +312,7 @@ class Hunyuan3DDiT(nn.Module): self.qkv_bias = qkv_bias self.time_factor = time_factor self.out_channels = self.in_channels + self.guidance_embed = guidance_embed if hidden_size % num_heads != 0: raise ValueError( @@ -316,6 +326,9 @@ class Hunyuan3DDiT(nn.Module): self.latent_in = nn.Linear(self.in_channels, self.hidden_size, bias=True) self.time_in = MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size) self.cond_in = nn.Linear(context_in_dim, self.hidden_size) + self.guidance_in = ( + MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size) if guidance_embed else nn.Identity() + ) self.double_blocks = nn.ModuleList( [ @@ -374,7 +387,14 @@ class Hunyuan3DDiT(nn.Module): ) -> Tensor: cond = contexts['main'] latent = self.latent_in(x) + vec = self.time_in(timestep_embedding(t, 256, self.time_factor).to(dtype=latent.dtype)) + if self.guidance_embed: + guidance = kwargs.get('guidance', None) + if guidance is None: + raise ValueError("Didn't get guidance strength for guidance distilled model.") + vec = vec + self.guidance_in(timestep_embedding(guidance, 256, self.time_factor)) + cond = self.cond_in(cond) pe = None diff --git a/hy3dgen/shapegen/pipelines.py b/hy3dgen/shapegen/pipelines.py old mode 100644 new mode 100755 index 5d535ea2b222e49a659c0fbbbfbe23f50cdb947e..ea265fa026ef386f7c41117fa8711a08b86acf80 --- a/hy3dgen/shapegen/pipelines.py +++ b/hy3dgen/shapegen/pipelines.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -25,7 +15,6 @@ import copy import importlib import inspect -import logging import os from typing import List, Optional, Union @@ -35,9 +24,12 @@ import trimesh import yaml from PIL import Image from diffusers.utils.torch_utils import randn_tensor +from diffusers.utils.import_utils import is_accelerate_version, is_accelerate_available from tqdm import tqdm -logger = logging.getLogger(__name__) +from .models.autoencoders import ShapeVAE +from .models.autoencoders import SurfaceExtractors +from .utils import logger, synchronize_timer, smart_load_model def retrieve_timesteps( @@ -99,6 +91,7 @@ def retrieve_timesteps( return timesteps, num_inference_steps +@synchronize_timer('Export to trimesh') def export_to_trimesh(mesh_output): if isinstance(mesh_output, list): outputs = [] @@ -135,7 +128,11 @@ def instantiate_from_config(config, **kwargs): class Hunyuan3DDiTPipeline: + model_cpu_offload_seq = "conditioner->model->vae" + _exclude_from_cpu_offload = [] + @classmethod + @synchronize_timer('Hunyuan3DDiTPipeline Model Loading') def from_single_file( cls, ckpt_path, @@ -168,7 +165,7 @@ class Hunyuan3DDiTPipeline: ckpt[model_name] = {} ckpt[model_name][new_key] = value else: - ckpt = torch.load(ckpt_path, map_location='cpu') + ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=True) # load model model = instantiate_from_config(config['model']) model.load_state_dict(ckpt['model']) @@ -186,7 +183,6 @@ class Hunyuan3DDiTPipeline: scheduler=scheduler, conditioner=conditioner, image_processor=image_processor, - scheduler_cfg=config['scheduler'], device=device, dtype=dtype, ) @@ -200,37 +196,27 @@ class Hunyuan3DDiTPipeline: def from_pretrained( cls, model_path, - ckpt_name='model.ckpt', - config_name='config.yaml', device='cuda', dtype=torch.float16, - use_safetensors=None, + use_safetensors=True, + variant='fp16', + subfolder='hunyuan3d-dit-v2-0', **kwargs, ): - original_model_path = model_path - if not os.path.exists(model_path): - # try local path - base_dir = os.environ.get('HY3DGEN_MODELS', '~/.cache/hy3dgen') - model_path = os.path.expanduser(os.path.join(base_dir, model_path, 'hunyuan3d-dit-v2-0')) - if not os.path.exists(model_path): - - try: - import huggingface_hub - path = snapshot_download( - repo_id=original_model_path, - allow_patterns=[f"hunyuan3d-dit-v2-0/*"], # 关键修改:模式匹配子文件夹 - ) - model_path = os.path.join(path, 'hunyuan3d-dit-v2-0') - except ImportError: - logger.warning( - "You need to install HuggingFace Hub to load models from the hub." - ) - raise RuntimeError(f"Model path {model_path} not found") - if not os.path.exists(model_path): - raise FileNotFoundError(f"Model path {original_model_path} not found") - - config_path = os.path.join(model_path, config_name) - ckpt_path = os.path.join(model_path, ckpt_name) + kwargs['from_pretrained_kwargs'] = dict( + model_path=model_path, + subfolder=subfolder, + use_safetensors=use_safetensors, + variant=variant, + dtype=dtype, + device=device, + ) + config_path, ckpt_path = smart_load_model( + model_path, + subfolder=subfolder, + use_safetensors=use_safetensors, + variant=variant + ) return cls.from_single_file( ckpt_path, config_path, @@ -257,27 +243,185 @@ class Hunyuan3DDiTPipeline: self.conditioner = conditioner self.image_processor = image_processor self.kwargs = kwargs - self.to(device, dtype) + def compile(self): + self.vae = torch.compile(self.vae) + self.model = torch.compile(self.model) + self.conditioner = torch.compile(self.conditioner) + + def enable_flashvdm( + self, + enabled: bool = True, + adaptive_kv_selection=True, + topk_mode='mean', + mc_algo='dmc', + replace_vae=True, + ): + if enabled: + model_path = self.kwargs['from_pretrained_kwargs']['model_path'] + turbo_vae_mapping = { + 'Hunyuan3D-2': ('tencent/Hunyuan3D-2', 'hunyuan3d-vae-v2-0-turbo'), + 'Hunyuan3D-2mv': ('tencent/Hunyuan3D-2', 'hunyuan3d-vae-v2-0-turbo'), + 'Hunyuan3D-2mini': ('tencent/Hunyuan3D-2mini', 'hunyuan3d-vae-v2-mini-turbo'), + } + model_name = model_path.split('/')[-1] + if replace_vae and model_name in turbo_vae_mapping: + model_path, subfolder = turbo_vae_mapping[model_name] + self.vae = ShapeVAE.from_pretrained( + model_path, subfolder=subfolder, + use_safetensors=self.kwargs['from_pretrained_kwargs']['use_safetensors'], + device=self.device, + ) + self.vae.enable_flashvdm_decoder( + enabled=enabled, + adaptive_kv_selection=adaptive_kv_selection, + topk_mode=topk_mode, + mc_algo=mc_algo + ) + else: + model_path = self.kwargs['from_pretrained_kwargs']['model_path'] + vae_mapping = { + 'Hunyuan3D-2': ('tencent/Hunyuan3D-2', 'hunyuan3d-vae-v2-0'), + 'Hunyuan3D-2mv': ('tencent/Hunyuan3D-2', 'hunyuan3d-vae-v2-0'), + 'Hunyuan3D-2mini': ('tencent/Hunyuan3D-2mini', 'hunyuan3d-vae-v2-mini'), + } + model_name = model_path.split('/')[-1] + if model_name in vae_mapping: + model_path, subfolder = vae_mapping[model_name] + self.vae = ShapeVAE.from_pretrained(model_path, subfolder=subfolder) + self.vae.enable_flashvdm_decoder(enabled=False) + def to(self, device=None, dtype=None): - if device is not None: - self.device = torch.device(device) - self.vae.to(device) - self.model.to(device) - self.conditioner.to(device) if dtype is not None: self.dtype = dtype self.vae.to(dtype=dtype) self.model.to(dtype=dtype) self.conditioner.to(dtype=dtype) + if device is not None: + self.device = torch.device(device) + self.vae.to(device) + self.model.to(device) + self.conditioner.to(device) + + @property + def _execution_device(self): + r""" + Returns the device on which the pipeline's models will be executed. After calling + [`~DiffusionPipeline.enable_sequential_cpu_offload`] the execution device can only be inferred from + Accelerate's module hooks. + """ + for name, model in self.components.items(): + if not isinstance(model, torch.nn.Module) or name in self._exclude_from_cpu_offload: + continue + + if not hasattr(model, "_hf_hook"): + return self.device + for module in model.modules(): + if ( + hasattr(module, "_hf_hook") + and hasattr(module._hf_hook, "execution_device") + and module._hf_hook.execution_device is not None + ): + return torch.device(module._hf_hook.execution_device) + return self.device + + def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"): + r""" + Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared + to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward` + method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with + `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`. + + Arguments: + gpu_id (`int`, *optional*): + The ID of the accelerator that shall be used in inference. If not specified, it will default to 0. + device (`torch.Device` or `str`, *optional*, defaults to "cuda"): + The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will + default to "cuda". + """ + if self.model_cpu_offload_seq is None: + raise ValueError( + "Model CPU offload cannot be enabled because no `model_cpu_offload_seq` class attribute is set." + ) + + if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"): + from accelerate import cpu_offload_with_hook + else: + raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") + + torch_device = torch.device(device) + device_index = torch_device.index + + if gpu_id is not None and device_index is not None: + raise ValueError( + f"You have passed both `gpu_id`={gpu_id} and an index as part of the passed device `device`={device}" + f"Cannot pass both. Please make sure to either not define `gpu_id` or not pass the index as part of the device: `device`={torch_device.type}" + ) + + # _offload_gpu_id should be set to passed gpu_id (or id in passed `device`) or default to previously set id or default to 0 + self._offload_gpu_id = gpu_id or torch_device.index or getattr(self, "_offload_gpu_id", 0) + + device_type = torch_device.type + device = torch.device(f"{device_type}:{self._offload_gpu_id}") + + if self.device.type != "cpu": + self.to("cpu") + device_mod = getattr(torch, self.device.type, None) + if hasattr(device_mod, "empty_cache") and device_mod.is_available(): + device_mod.empty_cache() # otherwise we don't see the memory savings (but they probably exist) + + all_model_components = {k: v for k, v in self.components.items() if isinstance(v, torch.nn.Module)} + + self._all_hooks = [] + hook = None + for model_str in self.model_cpu_offload_seq.split("->"): + model = all_model_components.pop(model_str, None) + if not isinstance(model, torch.nn.Module): + continue - def encode_cond(self, image, mask, do_classifier_free_guidance, dual_guidance): + _, hook = cpu_offload_with_hook(model, device, prev_module_hook=hook) + self._all_hooks.append(hook) + + # CPU offload models that are not in the seq chain unless they are explicitly excluded + # these models will stay on CPU until maybe_free_model_hooks is called + # some models cannot be in the seq chain because they are iteratively called, such as controlnet + for name, model in all_model_components.items(): + if not isinstance(model, torch.nn.Module): + continue + + if name in self._exclude_from_cpu_offload: + model.to(device) + else: + _, hook = cpu_offload_with_hook(model, device) + self._all_hooks.append(hook) + + def maybe_free_model_hooks(self): + r""" + Function that offloads all components, removes all model hooks that were added when using + `enable_model_cpu_offload` and then applies them again. In case the model has not been offloaded this function + is a no-op. Make sure to add this function to the end of the `__call__` function of your pipeline so that it + functions correctly when applying enable_model_cpu_offload. + """ + if not hasattr(self, "_all_hooks") or len(self._all_hooks) == 0: + # `enable_model_cpu_offload` has not be called, so silently do nothing + return + + for hook in self._all_hooks: + # offload model and remove hook from model + hook.offload() + hook.remove() + + # make sure the model is in the same state as before calling it + self.enable_model_cpu_offload() + + @synchronize_timer('Encode cond') + def encode_cond(self, image, additional_cond_inputs, do_classifier_free_guidance, dual_guidance): bsz = image.shape[0] - cond = self.conditioner(image=image, mask=mask) + cond = self.conditioner(image=image, **additional_cond_inputs) if do_classifier_free_guidance: - un_cond = self.conditioner.unconditional_embedding(bsz) + un_cond = self.conditioner.unconditional_embedding(bsz, **additional_cond_inputs) if dual_guidance: un_cond_drop_main = copy.deepcopy(un_cond) @@ -293,8 +437,6 @@ class Hunyuan3DDiTPipeline: cond = cat_recursive(cond, un_cond_drop_main, un_cond) else: - un_cond = self.conditioner.unconditional_embedding(bsz) - def cat_recursive(a, b): if isinstance(a, torch.Tensor): return torch.cat([a, b], dim=0).to(self.dtype) @@ -340,25 +482,27 @@ class Hunyuan3DDiTPipeline: latents = latents * getattr(self.scheduler, 'init_noise_sigma', 1.0) return latents - def prepare_image(self, image): + def prepare_image(self, image) -> dict: if isinstance(image, str) and not os.path.exists(image): raise FileNotFoundError(f"Couldn't find image at path {image}") if not isinstance(image, list): image = [image] - image_pts = [] - mask_pts = [] + + outputs = [] for img in image: - image_pt, mask_pt = self.image_processor(img, return_mask=True) - image_pts.append(image_pt) - mask_pts.append(mask_pt) + output = self.image_processor(img) + outputs.append(output) - image_pts = torch.cat(image_pts, dim=0).to(self.device, dtype=self.dtype) - if mask_pts[0] is not None: - mask_pts = torch.cat(mask_pts, dim=0).to(self.device, dtype=self.dtype) - else: - mask_pts = None - return image_pts, mask_pts + cond_input = {k: [] for k in outputs[0].keys()} + for output in outputs: + for key, value in output.items(): + cond_input[key].append(value) + for key, value in cond_input.items(): + if isinstance(value[0], torch.Tensor): + cond_input[key] = torch.cat(value, dim=0) + + return cond_input def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): """ @@ -388,6 +532,17 @@ class Hunyuan3DDiTPipeline: assert emb.shape == (w.shape[0], embedding_dim) return emb + def set_surface_extractor(self, mc_algo): + if mc_algo is None: + return + logger.info('The parameters `mc_algo` is deprecated, and will be removed in future versions.\n' + 'Please use: \n' + 'from hy3dgen.shapegen.models.autoencoders import SurfaceExtractors\n' + 'pipeline.vae.surface_extractor = SurfaceExtractors[mc_algo]() instead\n') + if mc_algo not in SurfaceExtractors.keys(): + raise ValueError(f"Unknown mc_algo {mc_algo}") + self.vae.surface_extractor = SurfaceExtractors[mc_algo]() + @torch.no_grad() def __call__( self, @@ -404,7 +559,7 @@ class Hunyuan3DDiTPipeline: octree_resolution=384, mc_level=-1 / 512, num_chunks=8000, - mc_algo='mc', + mc_algo=None, output_type: Optional[str] = "trimesh", enable_pbar=True, **kwargs, @@ -412,69 +567,72 @@ class Hunyuan3DDiTPipeline: callback = kwargs.pop("callback", None) callback_steps = kwargs.pop("callback_steps", None) + self.set_surface_extractor(mc_algo) + device = self.device dtype = self.dtype do_classifier_free_guidance = guidance_scale >= 0 and \ getattr(self.model, 'guidance_cond_proj_dim', None) is None dual_guidance = dual_guidance_scale >= 0 and dual_guidance - image, mask = self.prepare_image(image) - cond = self.encode_cond(image=image, - mask=mask, - do_classifier_free_guidance=do_classifier_free_guidance, - dual_guidance=dual_guidance) + cond_inputs = self.prepare_image(image) + image = cond_inputs.pop('image') + cond = self.encode_cond( + image=image, + additional_cond_inputs=cond_inputs, + do_classifier_free_guidance=do_classifier_free_guidance, + dual_guidance=False, + ) batch_size = image.shape[0] t_dtype = torch.long - scheduler = instantiate_from_config(self.kwargs['scheduler_cfg']) timesteps, num_inference_steps = retrieve_timesteps( - scheduler, num_inference_steps, device, timesteps, sigmas - ) + self.scheduler, num_inference_steps, device, timesteps, sigmas) latents = self.prepare_latents(batch_size, dtype, device, generator) extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) guidance_cond = None if getattr(self.model, 'guidance_cond_proj_dim', None) is not None: - print('Using lcm guidance scale') + logger.info('Using lcm guidance scale') guidance_scale_tensor = torch.tensor(guidance_scale - 1).repeat(batch_size) guidance_cond = self.get_guidance_scale_embedding( guidance_scale_tensor, embedding_dim=self.model.guidance_cond_proj_dim ).to(device=device, dtype=latents.dtype) - - for i, t in enumerate(tqdm(timesteps, disable=not enable_pbar, desc="Diffusion Sampling:", leave=False)): - # expand the latents if we are doing classifier free guidance - if do_classifier_free_guidance: - latent_model_input = torch.cat([latents] * (3 if dual_guidance else 2)) - else: - latent_model_input = latents - latent_model_input = scheduler.scale_model_input(latent_model_input, t) - - # predict the noise residual - timestep_tensor = torch.tensor([t], dtype=t_dtype, device=device) - timestep_tensor = timestep_tensor.expand(latent_model_input.shape[0]) - noise_pred = self.model(latent_model_input, timestep_tensor, cond, guidance_cond=guidance_cond) - - # no drop, drop clip, all drop - if do_classifier_free_guidance: - if dual_guidance: - noise_pred_clip, noise_pred_dino, noise_pred_uncond = noise_pred.chunk(3) - noise_pred = ( - noise_pred_uncond - + guidance_scale * (noise_pred_clip - noise_pred_dino) - + dual_guidance_scale * (noise_pred_dino - noise_pred_uncond) - ) + with synchronize_timer('Diffusion Sampling'): + for i, t in enumerate(tqdm(timesteps, disable=not enable_pbar, desc="Diffusion Sampling:", leave=False)): + # expand the latents if we are doing classifier free guidance + if do_classifier_free_guidance: + latent_model_input = torch.cat([latents] * (3 if dual_guidance else 2)) else: - noise_pred_cond, noise_pred_uncond = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_cond - noise_pred_uncond) - - # compute the previous noisy sample x_t -> x_t-1 - outputs = scheduler.step(noise_pred, t, latents, **extra_step_kwargs) - latents = outputs.prev_sample - - if callback is not None and i % callback_steps == 0: - step_idx = i // getattr(scheduler, "order", 1) - callback(step_idx, t, outputs) + latent_model_input = latents + latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + + # predict the noise residual + timestep_tensor = torch.tensor([t], dtype=t_dtype, device=device) + timestep_tensor = timestep_tensor.expand(latent_model_input.shape[0]) + noise_pred = self.model(latent_model_input, timestep_tensor, cond, guidance_cond=guidance_cond) + + # no drop, drop clip, all drop + if do_classifier_free_guidance: + if dual_guidance: + noise_pred_clip, noise_pred_dino, noise_pred_uncond = noise_pred.chunk(3) + noise_pred = ( + noise_pred_uncond + + guidance_scale * (noise_pred_clip - noise_pred_dino) + + dual_guidance_scale * (noise_pred_dino - noise_pred_uncond) + ) + else: + noise_pred_cond, noise_pred_uncond = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_cond - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + outputs = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs) + latents = outputs.prev_sample + + if callback is not None and i % callback_steps == 0: + step_idx = i // getattr(self.scheduler, "order", 1) + callback(step_idx, t, outputs) return self._export( latents, @@ -482,7 +640,17 @@ class Hunyuan3DDiTPipeline: box_v, mc_level, num_chunks, octree_resolution, mc_algo, ) - def _export(self, latents, output_type, box_v, mc_level, num_chunks, octree_resolution, mc_algo): + def _export( + self, + latents, + output_type='trimesh', + box_v=1.01, + mc_level=0.0, + num_chunks=20000, + octree_resolution=256, + mc_algo='mc', + enable_pbar=True + ): if not output_type == "latent": latents = 1. / self.vae.scale_factor * latents latents = self.vae(latents) @@ -493,6 +661,7 @@ class Hunyuan3DDiTPipeline: num_chunks=num_chunks, octree_resolution=octree_resolution, mc_algo=mc_algo, + enable_pbar=enable_pbar, ) else: outputs = latents @@ -505,20 +674,20 @@ class Hunyuan3DDiTPipeline: class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline): - @torch.no_grad() + @torch.inference_mode() def __call__( self, - image: Union[str, List[str], Image.Image] = None, + image: Union[str, List[str], Image.Image, dict, List[dict]] = None, num_inference_steps: int = 50, timesteps: List[int] = None, sigmas: List[float] = None, eta: float = 0.0, - guidance_scale: float = 7.5, + guidance_scale: float = 5.0, generator=None, box_v=1.01, octree_resolution=384, mc_level=0.0, - mc_algo='mc', + mc_algo=None, num_chunks=8000, output_type: Optional[str] = "trimesh", enable_pbar=True, @@ -527,6 +696,8 @@ class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline): callback = kwargs.pop("callback", None) callback_steps = kwargs.pop("callback_steps", None) + self.set_surface_extractor(mc_algo) + device = self.device dtype = self.dtype do_classifier_free_guidance = guidance_scale >= 0 and not ( @@ -534,10 +705,11 @@ class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline): self.model.guidance_embed is True ) - image, mask = self.prepare_image(image) + cond_inputs = self.prepare_image(image) + image = cond_inputs.pop('image') cond = self.encode_cond( image=image, - mask=mask, + additional_cond_inputs=cond_inputs, do_classifier_free_guidance=do_classifier_free_guidance, dual_guidance=False, ) @@ -546,9 +718,8 @@ class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline): # 5. Prepare timesteps # NOTE: this is slightly different from common usage, we start from 0. sigmas = np.linspace(0, 1, num_inference_steps) if sigmas is None else sigmas - scheduler = instantiate_from_config(self.kwargs['scheduler_cfg']) timesteps, num_inference_steps = retrieve_timesteps( - scheduler, + self.scheduler, num_inference_steps, device, sigmas=sigmas, @@ -559,34 +730,36 @@ class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline): if hasattr(self.model, 'guidance_embed') and \ self.model.guidance_embed is True: guidance = torch.tensor([guidance_scale] * batch_size, device=device, dtype=dtype) + # logger.info(f'Using guidance embed with scale {guidance_scale}') - for i, t in enumerate(tqdm(timesteps, disable=not enable_pbar, desc="Diffusion Sampling:")): - # expand the latents if we are doing classifier free guidance - if do_classifier_free_guidance: - latent_model_input = torch.cat([latents] * 2) - else: - latent_model_input = latents + with synchronize_timer('Diffusion Sampling'): + for i, t in enumerate(tqdm(timesteps, disable=not enable_pbar, desc="Diffusion Sampling:")): + # expand the latents if we are doing classifier free guidance + if do_classifier_free_guidance: + latent_model_input = torch.cat([latents] * 2) + else: + latent_model_input = latents - # NOTE: we assume model get timesteps ranged from 0 to 1 - timestep = t.expand(latent_model_input.shape[0]).to( - latents.dtype) / scheduler.config.num_train_timesteps - noise_pred = self.model(latent_model_input, timestep, cond, guidance=guidance) + # NOTE: we assume model get timesteps ranged from 0 to 1 + timestep = t.expand(latent_model_input.shape[0]).to( + latents.dtype) / self.scheduler.config.num_train_timesteps + noise_pred = self.model(latent_model_input, timestep, cond, guidance=guidance) - if do_classifier_free_guidance: - noise_pred_cond, noise_pred_uncond = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_cond - noise_pred_uncond) + if do_classifier_free_guidance: + noise_pred_cond, noise_pred_uncond = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_cond - noise_pred_uncond) - # compute the previous noisy sample x_t -> x_t-1 - outputs = scheduler.step(noise_pred, t, latents) - latents = outputs.prev_sample + # compute the previous noisy sample x_t -> x_t-1 + outputs = self.scheduler.step(noise_pred, t, latents) + latents = outputs.prev_sample - if callback is not None and i % callback_steps == 0: - step_idx = i // getattr(scheduler, "order", 1) - callback(step_idx, t, outputs) + if callback is not None and i % callback_steps == 0: + step_idx = i // getattr(self.scheduler, "order", 1) + callback(step_idx, t, outputs) return self._export( latents, output_type, box_v, mc_level, num_chunks, octree_resolution, mc_algo, + enable_pbar=enable_pbar, ) - diff --git a/hy3dgen/shapegen/postprocessors.py b/hy3dgen/shapegen/postprocessors.py old mode 100644 new mode 100755 index 1843817b02a5355cf0a5e40fae2e476bee61a326..d258369e2b9116090ade3c955d57215d1481138e --- a/hy3dgen/shapegen/postprocessors.py +++ b/hy3dgen/shapegen/postprocessors.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -22,13 +12,17 @@ # fine-tuning enabling code and other elements of the foregoing made publicly available # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. +import os import tempfile from typing import Union +import numpy as np import pymeshlab +import torch import trimesh -from .models.vae import Latent2MeshOutput +from .models.autoencoders import Latent2MeshOutput +from .utils import synchronize_timer def load_mesh(path): @@ -41,6 +35,9 @@ def load_mesh(path): def reduce_face(mesh: pymeshlab.MeshSet, max_facenum: int = 200000): + if max_facenum > mesh.current_mesh().face_number(): + return mesh + mesh.apply_filter( "meshing_decimation_quadric_edge_collapse", targetfacenum=max_facenum, @@ -63,7 +60,7 @@ def remove_floater(mesh: pymeshlab.MeshSet): def pymeshlab2trimesh(mesh: pymeshlab.MeshSet): - with tempfile.NamedTemporaryFile(suffix='.ply', delete=True) as temp_file: + with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file: mesh.save_current_mesh(temp_file.name) mesh = trimesh.load(temp_file.name) # 检查加载的对象类型 @@ -77,7 +74,7 @@ def pymeshlab2trimesh(mesh: pymeshlab.MeshSet): def trimesh2pymeshlab(mesh: trimesh.Trimesh): - with tempfile.NamedTemporaryFile(suffix='.ply', delete=True) as temp_file: + with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file: if isinstance(mesh, trimesh.scene.Scene): for idx, obj in enumerate(mesh.geometry.values()): if idx == 0: @@ -119,6 +116,7 @@ def import_mesh(mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutpu class FaceReducer: + @synchronize_timer('FaceReducer') def __call__( self, mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str], @@ -131,6 +129,7 @@ class FaceReducer: class FloaterRemover: + @synchronize_timer('FloaterRemover') def __call__( self, mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str], @@ -142,16 +141,62 @@ class FloaterRemover: class DegenerateFaceRemover: + @synchronize_timer('DegenerateFaceRemover') def __call__( self, mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str], ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput]: ms = import_mesh(mesh) - with tempfile.NamedTemporaryFile(suffix='.ply', delete=True) as temp_file: + with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file: ms.save_current_mesh(temp_file.name) ms = pymeshlab.MeshSet() ms.load_new_mesh(temp_file.name) mesh = export_mesh(mesh, ms) return mesh + + +def mesh_normalize(mesh): + """ + Normalize mesh vertices to sphere + """ + scale_factor = 1.2 + vtx_pos = np.asarray(mesh.vertices) + max_bb = (vtx_pos - 0).max(0)[0] + min_bb = (vtx_pos - 0).min(0)[0] + + center = (max_bb + min_bb) / 2 + + scale = torch.norm(torch.tensor(vtx_pos - center, dtype=torch.float32), dim=1).max() * 2.0 + + vtx_pos = (vtx_pos - center) * (scale_factor / float(scale)) + mesh.vertices = vtx_pos + + return mesh + + +class MeshSimplifier: + def __init__(self, executable: str = None): + if executable is None: + CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) + executable = os.path.join(CURRENT_DIR, "mesh_simplifier.bin") + self.executable = executable + + @synchronize_timer('MeshSimplifier') + def __call__( + self, + mesh: Union[trimesh.Trimesh], + ) -> Union[trimesh.Trimesh]: + with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as temp_input: + with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as temp_output: + mesh.export(temp_input.name) + os.system(f'{self.executable} {temp_input.name} {temp_output.name}') + ms = trimesh.load(temp_output.name, process=False) + if isinstance(ms, trimesh.Scene): + combined_mesh = trimesh.Trimesh() + for geom in ms.geometry.values(): + combined_mesh = trimesh.util.concatenate([combined_mesh, geom]) + ms = combined_mesh + ms = mesh_normalize(ms) + return ms diff --git a/hy3dgen/shapegen/preprocessors.py b/hy3dgen/shapegen/preprocessors.py old mode 100644 new mode 100755 index 2bdaff2d16cc0844d8d23c886d35c2f4e7286ff7..8a9cb9ea1591363fb77f4e02351a0f945e3bc1ab --- a/hy3dgen/shapegen/preprocessors.py +++ b/hy3dgen/shapegen/preprocessors.py @@ -1,12 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -87,7 +78,7 @@ class ImageProcessorV2: interpolation=cv2.INTER_AREA) bg = np.ones((result.shape[0], result.shape[1], 3), dtype=np.uint8) * 255 - # bg = np.zeros((result.shape[0], result.shape[1], 3), dtype=np.uint8) * 255 + mask = result[..., 3:].astype(np.float32) / 255 result = result[..., :3] * mask + bg * (1 - mask) @@ -96,15 +87,13 @@ class ImageProcessorV2: mask = mask.clip(0, 255).astype(np.uint8) return result, mask - def __call__(self, image, border_ratio=0.15, to_tensor=True, return_mask=False, **kwargs): - if self.border_ratio is not None: - border_ratio = self.border_ratio - print(f"Using border_ratio from init: {border_ratio}") + def load_image(self, image, border_ratio=0.15, to_tensor=True): if isinstance(image, str): image = cv2.imread(image, cv2.IMREAD_UNCHANGED) image, mask = self.recenter(image, border_ratio=border_ratio) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) elif isinstance(image, Image.Image): + image = image.convert("RGBA") image = np.asarray(image) image, mask = self.recenter(image, border_ratio=border_ratio) @@ -115,13 +104,64 @@ class ImageProcessorV2: if to_tensor: image = array_to_tensor(image) mask = array_to_tensor(mask) - if return_mask: - return image, mask - return image + return image, mask + + def __call__(self, image, border_ratio=0.15, to_tensor=True, **kwargs): + if self.border_ratio is not None: + border_ratio = self.border_ratio + image, mask = self.load_image(image, border_ratio=border_ratio, to_tensor=to_tensor) + outputs = { + 'image': image, + 'mask': mask + } + return outputs + + +class MVImageProcessorV2(ImageProcessorV2): + """ + view order: front, front clockwise 90, back, front clockwise 270 + """ + return_view_idx = True + + def __init__(self, size=512, border_ratio=None): + super().__init__(size, border_ratio) + self.view2idx = { + 'front': 0, + 'left': 1, + 'back': 2, + 'right': 3 + } + + def __call__(self, image_dict, border_ratio=0.15, to_tensor=True, **kwargs): + if self.border_ratio is not None: + border_ratio = self.border_ratio + + images = [] + masks = [] + view_idxs = [] + for idx, (view_tag, image) in enumerate(image_dict.items()): + view_idxs.append(self.view2idx[view_tag]) + image, mask = self.load_image(image, border_ratio=border_ratio, to_tensor=to_tensor) + images.append(image) + masks.append(mask) + + zipped_lists = zip(view_idxs, images, masks) + sorted_zipped_lists = sorted(zipped_lists) + view_idxs, images, masks = zip(*sorted_zipped_lists) + + image = torch.cat(images, 0).unsqueeze(0) + mask = torch.cat(masks, 0).unsqueeze(0) + outputs = { + 'image': image, + 'mask': mask, + 'view_idxs': view_idxs + } + return outputs IMAGE_PROCESSORS = { "v2": ImageProcessorV2, + 'mv_v2': MVImageProcessorV2, } DEFAULT_IMAGEPROCESSOR = 'v2' diff --git a/hy3dgen/shapegen/schedulers.py b/hy3dgen/shapegen/schedulers.py old mode 100644 new mode 100755 index 0069f5cd49c5095930b588f01129a77f172171a7..6ef364360e13afae03c39e49f4fa6da963c970d4 --- a/hy3dgen/shapegen/schedulers.py +++ b/hy3dgen/shapegen/schedulers.py @@ -12,6 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT +# except for the third-party components listed below. +# Hunyuan 3D does not impose any additional limitations beyond what is outlined +# in the repsective licenses of these third-party components. +# Users must comply with all terms and conditions of original licenses of these third-party +# components and must ensure that the usage of the third party components adheres to +# all relevant laws and regulations. + +# For avoidance of doubts, Hunyuan 3D means the large language models and +# their software and algorithms, including trained model weights, parameters (including +# optimizer states), machine-learning model code, inference-enabling code, training-enabling code, +# fine-tuning enabling code and other elements of the foregoing made publicly available +# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. + import math from dataclasses import dataclass from typing import List, Optional, Tuple, Union @@ -305,3 +319,162 @@ class FlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin): def __len__(self): return self.config.num_train_timesteps + + +@dataclass +class ConsistencyFlowMatchEulerDiscreteSchedulerOutput(BaseOutput): + prev_sample: torch.FloatTensor + pred_original_sample: torch.FloatTensor + + +class ConsistencyFlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin): + _compatibles = [] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + pcm_timesteps: int = 50, + ): + sigmas = np.linspace(0, 1, num_train_timesteps) + step_ratio = num_train_timesteps // pcm_timesteps + + euler_timesteps = (np.arange(1, pcm_timesteps) * step_ratio).round().astype(np.int64) - 1 + euler_timesteps = np.asarray([0] + euler_timesteps.tolist()) + + self.euler_timesteps = euler_timesteps + self.sigmas = sigmas[self.euler_timesteps] + self.sigmas = torch.from_numpy((self.sigmas.copy())) + self.timesteps = self.sigmas * num_train_timesteps + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + @property + def step_index(self): + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + @property + def begin_index(self): + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0): + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def _sigma_to_t(self, sigma): + return sigma * self.config.num_train_timesteps + + def set_timesteps( + self, + num_inference_steps: int = None, + device: Union[str, torch.device] = None, + sigmas: Optional[List[float]] = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + """ + self.num_inference_steps = num_inference_steps if num_inference_steps is not None else len(sigmas) + inference_indices = np.linspace( + 0, self.config.pcm_timesteps, num=self.num_inference_steps, endpoint=False + ) + inference_indices = np.floor(inference_indices).astype(np.int64) + inference_indices = torch.from_numpy(inference_indices).long() + + self.sigmas_ = self.sigmas[inference_indices] + timesteps = self.sigmas_ * self.config.num_train_timesteps + self.timesteps = timesteps.to(device=device) + self.sigmas_ = torch.cat( + [self.sigmas_, torch.ones(1, device=self.sigmas_.device)] + ) + + self._step_index = None + self._begin_index = None + + def index_for_timestep(self, timestep, schedule_timesteps=None): + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + def _init_step_index(self, timestep): + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.FloatTensor, + timestep: Union[float, torch.FloatTensor], + sample: torch.FloatTensor, + generator: Optional[torch.Generator] = None, + return_dict: bool = True, + ) -> Union[ConsistencyFlowMatchEulerDiscreteSchedulerOutput, Tuple]: + if ( + isinstance(timestep, int) + or isinstance(timestep, torch.IntTensor) + or isinstance(timestep, torch.LongTensor) + ): + raise ValueError( + ( + "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" + " `EulerDiscreteScheduler.step()` is not supported. Make sure to pass" + " one of the `scheduler.timesteps` as a timestep." + ), + ) + + if self.step_index is None: + self._init_step_index(timestep) + + sample = sample.to(torch.float32) + + sigma = self.sigmas_[self.step_index] + sigma_next = self.sigmas_[self.step_index + 1] + + prev_sample = sample + (sigma_next - sigma) * model_output + prev_sample = prev_sample.to(model_output.dtype) + + pred_original_sample = sample + (1.0 - sigma) * model_output + pred_original_sample = pred_original_sample.to(model_output.dtype) + + self._step_index += 1 + + if not return_dict: + return (prev_sample,) + + return ConsistencyFlowMatchEulerDiscreteSchedulerOutput(prev_sample=prev_sample, + pred_original_sample=pred_original_sample) + + def __len__(self): + return self.config.num_train_timesteps diff --git a/hy3dgen/shapegen/utils.py b/hy3dgen/shapegen/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6ac8f5d42c47df0dc765e2cd28b352bed0a2dd8e --- /dev/null +++ b/hy3dgen/shapegen/utils.py @@ -0,0 +1,126 @@ +# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT +# except for the third-party components listed below. +# Hunyuan 3D does not impose any additional limitations beyond what is outlined +# in the repsective licenses of these third-party components. +# Users must comply with all terms and conditions of original licenses of these third-party +# components and must ensure that the usage of the third party components adheres to +# all relevant laws and regulations. + +# For avoidance of doubts, Hunyuan 3D means the large language models and +# their software and algorithms, including trained model weights, parameters (including +# optimizer states), machine-learning model code, inference-enabling code, training-enabling code, +# fine-tuning enabling code and other elements of the foregoing made publicly available +# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. + +import logging +import os +from functools import wraps + +import torch + + +def get_logger(name): + logger = logging.getLogger(name) + logger.setLevel(logging.INFO) + + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + return logger + + +logger = get_logger('hy3dgen.shapgen') + + +class synchronize_timer: + """ Synchronized timer to count the inference time of `nn.Module.forward`. + + Supports both context manager and decorator usage. + + Example as context manager: + ```python + with synchronize_timer('name') as t: + run() + ``` + + Example as decorator: + ```python + @synchronize_timer('Export to trimesh') + def export_to_trimesh(mesh_output): + pass + ``` + """ + + def __init__(self, name=None): + self.name = name + + def __enter__(self): + """Context manager entry: start timing.""" + if os.environ.get('HY3DGEN_DEBUG', '0') == '1': + self.start = torch.cuda.Event(enable_timing=True) + self.end = torch.cuda.Event(enable_timing=True) + self.start.record() + return lambda: self.time + + def __exit__(self, exc_type, exc_value, exc_tb): + """Context manager exit: stop timing and log results.""" + if os.environ.get('HY3DGEN_DEBUG', '0') == '1': + self.end.record() + torch.cuda.synchronize() + self.time = self.start.elapsed_time(self.end) + if self.name is not None: + logger.info(f'{self.name} takes {self.time} ms') + + def __call__(self, func): + """Decorator: wrap the function to time its execution.""" + + @wraps(func) + def wrapper(*args, **kwargs): + with self: + result = func(*args, **kwargs) + return result + + return wrapper + + +def smart_load_model( + model_path, + subfolder, + use_safetensors, + variant, +): + original_model_path = model_path + # try local path + base_dir = os.environ.get('HY3DGEN_MODELS', '~/.cache/hy3dgen') + model_path = os.path.expanduser(os.path.join(base_dir, model_path, subfolder)) + logger.info(f'Try to load model from local path: {model_path}') + if not os.path.exists(model_path): + logger.info('Model path not exists, try to download from huggingface') + try: + from huggingface_hub import snapshot_download + # 只下载指定子目录 + path = snapshot_download( + repo_id=original_model_path, + allow_patterns=[f"{subfolder}/*"], # 关键修改:模式匹配子文件夹 + ) + model_path = os.path.join(path, subfolder) # 保持路径拼接逻辑不变 + except ImportError: + logger.warning( + "You need to install HuggingFace Hub to load models from the hub." + ) + raise RuntimeError(f"Model path {model_path} not found") + except Exception as e: + raise e + + if not os.path.exists(model_path): + raise FileNotFoundError(f"Model path {original_model_path} not found") + + extension = 'ckpt' if not use_safetensors else 'safetensors' + variant = '' if variant is None else f'.{variant}' + ckpt_name = f'model{variant}.{extension}' + config_path = os.path.join(model_path, 'config.yaml') + ckpt_path = os.path.join(model_path, ckpt_name) + return config_path, ckpt_path diff --git a/hy3dgen/texgen/__init__.py b/hy3dgen/texgen/__init__.py index 1f890f024d507021eca8087d40dc472de36152bd..7054c5797257839532eb03b05a474ddf020b1695 100644 --- a/hy3dgen/texgen/__init__.py +++ b/hy3dgen/texgen/__init__.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined diff --git a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/io_glb.py b/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/io_glb.py deleted file mode 100644 index c5d7dc8c6127e62848dda8e79fdc281c5a7b42cb..0000000000000000000000000000000000000000 --- a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/io_glb.py +++ /dev/null @@ -1,248 +0,0 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - -# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT -# except for the third-party components listed below. -# Hunyuan 3D does not impose any additional limitations beyond what is outlined -# in the repsective licenses of these third-party components. -# Users must comply with all terms and conditions of original licenses of these third-party -# components and must ensure that the usage of the third party components adheres to -# all relevant laws and regulations. - -# For avoidance of doubts, Hunyuan 3D means the large language models and -# their software and algorithms, including trained model weights, parameters (including -# optimizer states), machine-learning model code, inference-enabling code, training-enabling code, -# fine-tuning enabling code and other elements of the foregoing made publicly available -# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. - -import base64 -import io -import os - -import numpy as np -from PIL import Image as PILImage -from pygltflib import GLTF2 -from scipy.spatial.transform import Rotation as R - - -# Function to extract buffer data -def get_buffer_data(gltf, buffer_view): - buffer = gltf.buffers[buffer_view.buffer] - buffer_data = gltf.get_data_from_buffer_uri(buffer.uri) - byte_offset = buffer_view.byteOffset if buffer_view.byteOffset else 0 - byte_length = buffer_view.byteLength - return buffer_data[byte_offset:byte_offset + byte_length] - - -# Function to extract attribute data -def get_attribute_data(gltf, accessor_index): - accessor = gltf.accessors[accessor_index] - buffer_view = gltf.bufferViews[accessor.bufferView] - buffer_data = get_buffer_data(gltf, buffer_view) - - comptype = {5120: np.int8, 5121: np.uint8, 5122: np.int16, 5123: np.uint16, 5125: np.uint32, 5126: np.float32} - dtype = comptype[accessor.componentType] - - t2n = {'SCALAR': 1, 'VEC2': 2, 'VEC3': 3, 'VEC4': 4, 'MAT2': 4, 'MAT3': 9, 'MAT4': 16} - num_components = t2n[accessor.type] - - # Calculate the correct slice of data - byte_offset = accessor.byteOffset if accessor.byteOffset else 0 - byte_stride = buffer_view.byteStride if buffer_view.byteStride else num_components * np.dtype(dtype).itemsize - count = accessor.count - - # Extract the attribute data - attribute_data = np.zeros((count, num_components), dtype=dtype) - for i in range(count): - start = byte_offset + i * byte_stride - end = start + num_components * np.dtype(dtype).itemsize - attribute_data[i] = np.frombuffer(buffer_data[start:end], dtype=dtype) - - return attribute_data - - -# Function to extract image data -def get_image_data(gltf, image, folder): - if image.uri: - if image.uri.startswith('data:'): - # Data URI - header, encoded = image.uri.split(',', 1) - data = base64.b64decode(encoded) - else: - # External file - fn = image.uri - if not os.path.isabs(fn): - fn = folder + '/' + fn - with open(fn, 'rb') as f: - data = f.read() - else: - buffer_view = gltf.bufferViews[image.bufferView] - data = get_buffer_data(gltf, buffer_view) - return data - - -# Function to convert triangle strip to triangles -def convert_triangle_strip_to_triangles(indices): - triangles = [] - for i in range(len(indices) - 2): - if i % 2 == 0: - triangles.append([indices[i], indices[i + 1], indices[i + 2]]) - else: - triangles.append([indices[i], indices[i + 2], indices[i + 1]]) - return np.array(triangles).reshape(-1, 3) - - -# Function to convert triangle fan to triangles -def convert_triangle_fan_to_triangles(indices): - triangles = [] - for i in range(1, len(indices) - 1): - triangles.append([indices[0], indices[i], indices[i + 1]]) - return np.array(triangles).reshape(-1, 3) - - -# Function to get the transformation matrix from a node -def get_node_transform(node): - if node.matrix: - return np.array(node.matrix).reshape(4, 4).T - else: - T = np.eye(4) - if node.translation: - T[:3, 3] = node.translation - if node.rotation: - R_mat = R.from_quat(node.rotation).as_matrix() - T[:3, :3] = R_mat - if node.scale: - S = np.diag(node.scale + [1]) - T = T @ S - return T - - -def get_world_transform(gltf, node_index, parents, world_transforms): - if parents[node_index] == -2: - return world_transforms[node_index] - - node = gltf.nodes[node_index] - if parents[node_index] == -1: - world_transforms[node_index] = get_node_transform(node) - parents[node_index] = -2 - return world_transforms[node_index] - - parent_index = parents[node_index] - parent_transform = get_world_transform(gltf, parent_index, parents, world_transforms) - world_transforms[node_index] = parent_transform @ get_node_transform(node) - parents[node_index] = -2 - return world_transforms[node_index] - - -def LoadGlb(path): - # Load the GLB file using pygltflib - gltf = GLTF2().load(path) - - primitives = [] - images = {} - # Iterate through the meshes in the GLB file - - world_transforms = [np.identity(4) for i in range(len(gltf.nodes))] - parents = [-1 for i in range(len(gltf.nodes))] - for node_index, node in enumerate(gltf.nodes): - for idx in node.children: - parents[idx] = node_index - # for i in range(len(gltf.nodes)): - # get_world_transform(gltf, i, parents, world_transform) - - for node_index, node in enumerate(gltf.nodes): - if node.mesh is not None: - world_transform = get_world_transform(gltf, node_index, parents, world_transforms) - # Iterate through the primitives in the mesh - mesh = gltf.meshes[node.mesh] - for primitive in mesh.primitives: - # Access the attributes of the primitive - attributes = primitive.attributes.__dict__ - mode = primitive.mode if primitive.mode is not None else 4 # Default to TRIANGLES - result = {} - if primitive.indices is not None: - indices = get_attribute_data(gltf, primitive.indices) - if mode == 4: # TRIANGLES - face_indices = indices.reshape(-1, 3) - elif mode == 5: # TRIANGLE_STRIP - face_indices = convert_triangle_strip_to_triangles(indices) - elif mode == 6: # TRIANGLE_FAN - face_indices = convert_triangle_fan_to_triangles(indices) - else: - continue - result['F'] = face_indices - - # Extract vertex positions - if 'POSITION' in attributes and attributes['POSITION'] is not None: - positions = get_attribute_data(gltf, attributes['POSITION']) - # Apply the world transformation to the positions - positions_homogeneous = np.hstack([positions, np.ones((positions.shape[0], 1))]) - transformed_positions = (world_transform @ positions_homogeneous.T).T[:, :3] - result['V'] = transformed_positions - - # Extract vertex colors - if 'COLOR_0' in attributes and attributes['COLOR_0'] is not None: - colors = get_attribute_data(gltf, attributes['COLOR_0']) - if colors.shape[-1] > 3: - colors = colors[..., :3] - result['VC'] = colors - - # Extract UVs - if 'TEXCOORD_0' in attributes and not attributes['TEXCOORD_0'] is None: - uvs = get_attribute_data(gltf, attributes['TEXCOORD_0']) - result['UV'] = uvs - - if primitive.material is not None: - material = gltf.materials[primitive.material] - if material.pbrMetallicRoughness is not None and material.pbrMetallicRoughness.baseColorTexture is not None: - texture_index = material.pbrMetallicRoughness.baseColorTexture.index - texture = gltf.textures[texture_index] - image_index = texture.source - if not image_index in images: - image = gltf.images[image_index] - image_data = get_image_data(gltf, image, os.path.dirname(path)) - pil_image = PILImage.open(io.BytesIO(image_data)) - if pil_image.mode != 'RGB': - pil_image = pil_image.convert('RGB') - images[image_index] = pil_image - result['TEX'] = image_index - elif material.emissiveTexture is not None: - texture_index = material.emissiveTexture.index - texture = gltf.textures[texture_index] - image_index = texture.source - if not image_index in images: - image = gltf.images[image_index] - image_data = get_image_data(gltf, image, os.path.dirname(path)) - pil_image = PILImage.open(io.BytesIO(image_data)) - if pil_image.mode != 'RGB': - pil_image = pil_image.convert('RGB') - images[image_index] = pil_image - result['TEX'] = image_index - else: - if material.pbrMetallicRoughness is not None: - base_color = material.pbrMetallicRoughness.baseColorFactor - else: - base_color = np.array([0.8, 0.8, 0.8], dtype=np.float32) - result['MC'] = base_color - - primitives.append(result) - - return primitives, images - - -def RotatePrimitives(primitives, transform): - for i in range(len(primitives)): - if 'V' in primitives[i]: - primitives[i]['V'] = primitives[i]['V'] @ transform.T - - -if __name__ == '__main__': - path = 'data/test.glb' - LoadGlb(path) diff --git a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/io_obj.py b/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/io_obj.py deleted file mode 100644 index a72c478d8efcb9a3d71a67ce5f167559ef76b922..0000000000000000000000000000000000000000 --- a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/io_obj.py +++ /dev/null @@ -1,76 +0,0 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - -# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT -# except for the third-party components listed below. -# Hunyuan 3D does not impose any additional limitations beyond what is outlined -# in the repsective licenses of these third-party components. -# Users must comply with all terms and conditions of original licenses of these third-party -# components and must ensure that the usage of the third party components adheres to -# all relevant laws and regulations. - -# For avoidance of doubts, Hunyuan 3D means the large language models and -# their software and algorithms, including trained model weights, parameters (including -# optimizer states), machine-learning model code, inference-enabling code, training-enabling code, -# fine-tuning enabling code and other elements of the foregoing made publicly available -# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. - -import cv2 -import numpy as np - - -def LoadObj(fn): - lines = [l.strip() for l in open(fn)] - vertices = [] - faces = [] - for l in lines: - words = [w for w in l.split(' ') if w != ''] - if len(words) == 0: - continue - if words[0] == 'v': - v = [float(words[i]) for i in range(1, 4)] - vertices.append(v) - elif words[0] == 'f': - f = [int(words[i]) - 1 for i in range(1, 4)] - faces.append(f) - - return np.array(vertices).astype('float32'), np.array(faces).astype('int32') - - -def LoadObjWithTexture(fn, tex_fn): - lines = [l.strip() for l in open(fn)] - vertices = [] - vertex_textures = [] - faces = [] - face_textures = [] - for l in lines: - words = [w for w in l.split(' ') if w != ''] - if len(words) == 0: - continue - if words[0] == 'v': - v = [float(words[i]) for i in range(1, len(words))] - vertices.append(v) - elif words[0] == 'vt': - v = [float(words[i]) for i in range(1, len(words))] - vertex_textures.append(v) - elif words[0] == 'f': - f = [] - ft = [] - for i in range(1, len(words)): - t = words[i].split('/') - f.append(int(t[0]) - 1) - ft.append(int(t[1]) - 1) - for i in range(2, len(f)): - faces.append([f[0], f[i - 1], f[i]]) - face_textures.append([ft[0], ft[i - 1], ft[i]]) - - tex_image = cv2.cvtColor(cv2.imread(tex_fn), cv2.COLOR_BGR2RGB) - return np.array(vertices).astype('float32'), np.array(vertex_textures).astype('float32'), np.array(faces).astype( - 'int32'), np.array(face_textures).astype('int32'), tex_image diff --git a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/render.py b/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/render.py deleted file mode 100644 index 743d4aac4da9e1e18374ce712ac24d19e6788870..0000000000000000000000000000000000000000 --- a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/render.py +++ /dev/null @@ -1,41 +0,0 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - -# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT -# except for the third-party components listed below. -# Hunyuan 3D does not impose any additional limitations beyond what is outlined -# in the repsective licenses of these third-party components. -# Users must comply with all terms and conditions of original licenses of these third-party -# components and must ensure that the usage of the third party components adheres to -# all relevant laws and regulations. - -# For avoidance of doubts, Hunyuan 3D means the large language models and -# their software and algorithms, including trained model weights, parameters (including -# optimizer states), machine-learning model code, inference-enabling code, training-enabling code, -# fine-tuning enabling code and other elements of the foregoing made publicly available -# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. - -import custom_rasterizer_kernel -import torch - - -def rasterize(pos, tri, resolution, clamp_depth=torch.zeros(0), use_depth_prior=0): - assert (pos.device == tri.device) - findices, barycentric = custom_rasterizer_kernel.rasterize_image(pos[0], tri, clamp_depth, resolution[1], - resolution[0], 1e-6, use_depth_prior) - return findices, barycentric - - -def interpolate(col, findices, barycentric, tri): - f = findices - 1 + (findices == 0) - vcol = col[0, tri.long()[f.long()]] - result = barycentric.view(*barycentric.shape, 1) * vcol - result = torch.sum(result, axis=-2) - return result.view(1, *result.shape) diff --git a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer_kernel.cpython-311-x86_64-linux-gnu.so b/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer_kernel.cpython-311-x86_64-linux-gnu.so deleted file mode 100644 index fae9d0b229821dfe744e1b7b70250848eaa60797..0000000000000000000000000000000000000000 Binary files a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer_kernel.cpython-311-x86_64-linux-gnu.so and /dev/null differ diff --git a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/.ninja_deps b/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/.ninja_deps deleted file mode 100644 index 0227139e664b127ab09b323a310ef5b67e038309..0000000000000000000000000000000000000000 Binary files a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/.ninja_deps and /dev/null differ diff --git a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/.ninja_log b/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/.ninja_log deleted file mode 100644 index 961073a8816d177520ae1b8a655f413b83678c12..0000000000000000000000000000000000000000 --- a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/.ninja_log +++ /dev/null @@ -1,4 +0,0 @@ -# ninja log v5 -5 12944 1737469910283155280 /apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer.o 6b1f5e5e4b199209 -4 13455 1737469910695486266 /apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/grid_neighbor.o af3659b839e5e6e4 -6 34765 1737469932096669642 /apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer_gpu.o f5d05646c31ca370 diff --git a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/build.ninja b/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/build.ninja deleted file mode 100644 index fb26eea1e35d1f43eba8e2b4be3527f6072dce16..0000000000000000000000000000000000000000 --- a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/build.ninja +++ /dev/null @@ -1,34 +0,0 @@ -ninja_required_version = 1.3 -cxx = c++ -nvcc = /usr/local/cuda/bin/nvcc - -cflags = -pthread -B /opt/conda/envs/hunyuan3d-2-open/compiler_compat -DNDEBUG -fwrapv -O2 -Wall -fPIC -O2 -isystem /opt/conda/envs/hunyuan3d-2-open/include -fPIC -O2 -isystem /opt/conda/envs/hunyuan3d-2-open/include -fPIC -I/opt/conda/envs/hunyuan3d-2-open/lib/python3.11/site-packages/torch/include -I/opt/conda/envs/hunyuan3d-2-open/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -I/opt/conda/envs/hunyuan3d-2-open/lib/python3.11/site-packages/torch/include/TH -I/opt/conda/envs/hunyuan3d-2-open/lib/python3.11/site-packages/torch/include/THC -I/usr/local/cuda/include -I/opt/conda/envs/hunyuan3d-2-open/include/python3.11 -c -post_cflags = -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=custom_rasterizer_kernel -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++17 -cuda_cflags = -I/opt/conda/envs/hunyuan3d-2-open/lib/python3.11/site-packages/torch/include -I/opt/conda/envs/hunyuan3d-2-open/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -I/opt/conda/envs/hunyuan3d-2-open/lib/python3.11/site-packages/torch/include/TH -I/opt/conda/envs/hunyuan3d-2-open/lib/python3.11/site-packages/torch/include/THC -I/usr/local/cuda/include -I/opt/conda/envs/hunyuan3d-2-open/include/python3.11 -c -cuda_post_cflags = -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options ''"'"'-fPIC'"'"'' -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=custom_rasterizer_kernel -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_90,code=compute_90 -gencode=arch=compute_90,code=sm_90 -std=c++17 -cuda_dlink_post_cflags = -ldflags = - -rule compile - command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags - depfile = $out.d - deps = gcc - -rule cuda_compile - depfile = $out.d - deps = gcc - command = $nvcc --generate-dependencies-with-compile --dependency-output $out.d $cuda_cflags -c $in -o $out $cuda_post_cflags - - - - - -build /apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/grid_neighbor.o: compile /apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/grid_neighbor.cpp -build /apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer.o: compile /apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp -build /apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer_gpu.o: cuda_compile /apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu - - - - - - diff --git a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/grid_neighbor.o b/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/grid_neighbor.o deleted file mode 100644 index 372a5daca94d37bb722a058e89a13e2153bc6341..0000000000000000000000000000000000000000 Binary files a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/grid_neighbor.o and /dev/null differ diff --git a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer.o b/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer.o deleted file mode 100644 index ec8fed027a0fe9a3339c8aeb51bfbeaf3b47f570..0000000000000000000000000000000000000000 Binary files a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer.o and /dev/null differ diff --git a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer_gpu.o b/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer_gpu.o deleted file mode 100644 index 963b6b8213309be5897c47fd976db2df8edafb3a..0000000000000000000000000000000000000000 Binary files a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer_gpu.o and /dev/null differ diff --git a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/PKG-INFO b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/PKG-INFO deleted file mode 100644 index 4fd8d7197973d690207193769b1355f2aab0f91d..0000000000000000000000000000000000000000 --- a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/PKG-INFO +++ /dev/null @@ -1,3 +0,0 @@ -Metadata-Version: 2.1 -Name: custom_rasterizer -Version: 0.1 diff --git a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/SOURCES.txt b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/SOURCES.txt deleted file mode 100644 index ca40e02e41f7ba071df02ce368bfefec2847a6ad..0000000000000000000000000000000000000000 --- a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/SOURCES.txt +++ /dev/null @@ -1,12 +0,0 @@ -setup.py -./custom_rasterizer/__init__.py -./custom_rasterizer/io_glb.py -./custom_rasterizer/io_obj.py -./custom_rasterizer/render.py -custom_rasterizer.egg-info/PKG-INFO -custom_rasterizer.egg-info/SOURCES.txt -custom_rasterizer.egg-info/dependency_links.txt -custom_rasterizer.egg-info/top_level.txt -lib/custom_rasterizer_kernel/grid_neighbor.cpp -lib/custom_rasterizer_kernel/rasterizer.cpp -lib/custom_rasterizer_kernel/rasterizer_gpu.cu \ No newline at end of file diff --git a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/dependency_links.txt b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/dependency_links.txt deleted file mode 100644 index 8b137891791fe96927ad78e64b0aad7bded08bdc..0000000000000000000000000000000000000000 --- a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/top_level.txt b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/top_level.txt deleted file mode 100644 index 4880ad0e94189fc44fe2052edd5eaa0fcdbdb7e8..0000000000000000000000000000000000000000 --- a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/top_level.txt +++ /dev/null @@ -1,2 +0,0 @@ -custom_rasterizer -custom_rasterizer_kernel diff --git a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/__init__.py b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/__init__.py index df40dcc8d4819eb903263ff1faf70ce902eb7e07..f471e1ac289d962613553fed2ba6e177e5af3ab9 100644 --- a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/__init__.py +++ b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/__init__.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined diff --git a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_glb.py b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_glb.py index c5d7dc8c6127e62848dda8e79fdc281c5a7b42cb..606dd774b558857fb8a6773509fecd1f7da6e9f3 100644 --- a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_glb.py +++ b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_glb.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined diff --git a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_obj.py b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_obj.py index a72c478d8efcb9a3d71a67ce5f167559ef76b922..e40d50050456a3a3d5cb3fbed516c4d4bd0bdb8f 100644 --- a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_obj.py +++ b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_obj.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined diff --git a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/render.py b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/render.py index 743d4aac4da9e1e18374ce712ac24d19e6788870..2d4d3f7ee6ba13ff7df1000eb3dd3e978d2d6fc4 100644 --- a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/render.py +++ b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/render.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined diff --git a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/grid_neighbor.cpp b/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/grid_neighbor.cpp deleted file mode 100644 index f02bcba5afd45a524143d06c972acb87c393fe97..0000000000000000000000000000000000000000 --- a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/grid_neighbor.cpp +++ /dev/null @@ -1,574 +0,0 @@ -#include "rasterizer.h" -#include - -inline int pos2key(float* p, int resolution) { - int x = (p[0] * 0.5 + 0.5) * resolution; - int y = (p[1] * 0.5 + 0.5) * resolution; - int z = (p[2] * 0.5 + 0.5) * resolution; - return (x * resolution + y) * resolution + z; -} - -inline void key2pos(int key, int resolution, float* p) { - int x = key / resolution / resolution; - int y = key / resolution % resolution; - int z = key % resolution; - p[0] = ((x + 0.5) / resolution - 0.5) * 2; - p[1] = ((y + 0.5) / resolution - 0.5) * 2; - p[2] = ((z + 0.5) / resolution - 0.5) * 2; -} - -inline void key2cornerpos(int key, int resolution, float* p) { - int x = key / resolution / resolution; - int y = key / resolution % resolution; - int z = key % resolution; - p[0] = ((x + 0.75) / resolution - 0.5) * 2; - p[1] = ((y + 0.25) / resolution - 0.5) * 2; - p[2] = ((z + 0.75) / resolution - 0.5) * 2; -} - -inline float* pos_ptr(int l, int i, int j, torch::Tensor t) { - float* pdata = t.data_ptr(); - int height = t.size(1); - int width = t.size(2); - return &pdata[((l * height + i) * width + j) * 4]; -} - -struct Grid -{ - std::vector seq2oddcorner; - std::vector seq2evencorner; - std::vector seq2grid; - std::vector seq2normal; - std::vector seq2neighbor; - std::unordered_map grid2seq; - std::vector downsample_seq; - int num_origin_seq; - int resolution; - int stride; -}; - -inline void pos_from_seq(Grid& grid, int seq, float* p) { - auto k = grid.seq2grid[seq]; - key2pos(k, grid.resolution, p); -} - -inline int fetch_seq(Grid& grid, int l, int i, int j, torch::Tensor pdata) { - float* p = pos_ptr(l, i, j, pdata); - if (p[3] == 0) - return -1; - auto key = pos2key(p, grid.resolution); - int seq = grid.grid2seq[key]; - return seq; -} - -inline int fetch_last_seq(Grid& grid, int i, int j, torch::Tensor pdata) { - int num_layers = pdata.size(0); - int l = 0; - int idx = fetch_seq(grid, l, i, j, pdata); - while (l < num_layers - 1) { - l += 1; - int new_idx = fetch_seq(grid, l, i, j, pdata); - if (new_idx == -1) - break; - idx = new_idx; - } - return idx; -} - -inline int fetch_nearest_seq(Grid& grid, int i, int j, int dim, float d, torch::Tensor pdata) { - float p[3]; - float max_dist = 1e10; - int best_idx = -1; - int num_layers = pdata.size(0); - for (int l = 0; l < num_layers; ++l) { - int idx = fetch_seq(grid, l, i, j, pdata); - if (idx == -1) - break; - pos_from_seq(grid, idx, p); - float dist = std::abs(d - p[(dim + 2) % 3]); - if (dist < max_dist) { - max_dist = dist; - best_idx = idx; - } - } - return best_idx; -} - -inline int fetch_nearest_seq_layer(Grid& grid, int i, int j, int dim, float d, torch::Tensor pdata) { - float p[3]; - float max_dist = 1e10; - int best_layer = -1; - int num_layers = pdata.size(0); - for (int l = 0; l < num_layers; ++l) { - int idx = fetch_seq(grid, l, i, j, pdata); - if (idx == -1) - break; - pos_from_seq(grid, idx, p); - float dist = std::abs(d - p[(dim + 2) % 3]); - if (dist < max_dist) { - max_dist = dist; - best_layer = l; - } - } - return best_layer; -} - -void FetchNeighbor(Grid& grid, int seq, float* pos, int dim, int boundary_info, std::vector& view_layer_positions, - int* output_indices) -{ - auto t = view_layer_positions[dim]; - int height = t.size(1); - int width = t.size(2); - int top = 0; - int ci = 0; - int cj = 0; - if (dim == 0) { - ci = (pos[1]/2+0.5)*height; - cj = (pos[0]/2+0.5)*width; - } - else if (dim == 1) { - ci = (pos[1]/2+0.5)*height; - cj = (pos[2]/2+0.5)*width; - } - else { - ci = (-pos[2]/2+0.5)*height; - cj = (pos[0]/2+0.5)*width; - } - int stride = grid.stride; - for (int ni = ci + stride; ni >= ci - stride; ni -= stride) { - for (int nj = cj - stride; nj <= cj + stride; nj += stride) { - int idx = -1; - if (ni == ci && nj == cj) - idx = seq; - else if (!(ni < 0 || ni >= height || nj < 0 || nj >= width)) { - if (boundary_info == -1) - idx = fetch_seq(grid, 0, ni, nj, t); - else if (boundary_info == 1) - idx = fetch_last_seq(grid, ni, nj, t); - else - idx = fetch_nearest_seq(grid, ni, nj, dim, pos[(dim + 2) % 3], t); - } - output_indices[top] = idx; - top += 1; - } - } -} - -void DownsampleGrid(Grid& src, Grid& tar) -{ - src.downsample_seq.resize(src.seq2grid.size(), -1); - tar.resolution = src.resolution / 2; - tar.stride = src.stride * 2; - float pos[3]; - std::vector seq2normal_count; - for (int i = 0; i < src.seq2grid.size(); ++i) { - key2pos(src.seq2grid[i], src.resolution, pos); - int k = pos2key(pos, tar.resolution); - int s = seq2normal_count.size(); - if (!tar.grid2seq.count(k)) { - tar.grid2seq[k] = tar.seq2grid.size(); - tar.seq2grid.emplace_back(k); - seq2normal_count.emplace_back(0); - seq2normal_count.emplace_back(0); - seq2normal_count.emplace_back(0); - //tar.seq2normal.emplace_back(src.seq2normal[i]); - } else { - s = tar.grid2seq[k] * 3; - } - seq2normal_count[s + src.seq2normal[i]] += 1; - src.downsample_seq[i] = tar.grid2seq[k]; - } - tar.seq2normal.resize(seq2normal_count.size() / 3); - for (int i = 0; i < seq2normal_count.size(); i += 3) { - int t = 0; - for (int j = 1; j < 3; ++j) { - if (seq2normal_count[i + j] > seq2normal_count[i + t]) - t = j; - } - tar.seq2normal[i / 3] = t; - } -} - -void NeighborGrid(Grid& grid, std::vector view_layer_positions, int v) -{ - grid.seq2evencorner.resize(grid.seq2grid.size(), 0); - grid.seq2oddcorner.resize(grid.seq2grid.size(), 0); - std::unordered_set visited_seq; - for (int vd = 0; vd < 3; ++vd) { - auto t = view_layer_positions[vd]; - auto t0 = view_layer_positions[v]; - int height = t.size(1); - int width = t.size(2); - int num_layers = t.size(0); - int num_view_layers = t0.size(0); - for (int i = 0; i < height; ++i) { - for (int j = 0; j < width; ++j) { - for (int l = 0; l < num_layers; ++l) { - int seq = fetch_seq(grid, l, i, j, t); - if (seq == -1) - break; - int dim = grid.seq2normal[seq]; - if (dim != v) - continue; - - float pos[3]; - pos_from_seq(grid, seq, pos); - - int ci = 0; - int cj = 0; - if (dim == 0) { - ci = (pos[1]/2+0.5)*height; - cj = (pos[0]/2+0.5)*width; - } - else if (dim == 1) { - ci = (pos[1]/2+0.5)*height; - cj = (pos[2]/2+0.5)*width; - } - else { - ci = (-pos[2]/2+0.5)*height; - cj = (pos[0]/2+0.5)*width; - } - - if ((ci % (grid.stride * 2) < grid.stride) && (cj % (grid.stride * 2) >= grid.stride)) - grid.seq2evencorner[seq] = 1; - - if ((ci % (grid.stride * 2) >= grid.stride) && (cj % (grid.stride * 2) < grid.stride)) - grid.seq2oddcorner[seq] = 1; - - bool is_boundary = false; - if (vd == v) { - if (l == 0 || l == num_layers - 1) - is_boundary = true; - else { - int seq_new = fetch_seq(grid, l + 1, i, j, t); - if (seq_new == -1) - is_boundary = true; - } - } - int boundary_info = 0; - if (is_boundary && (l == 0)) - boundary_info = -1; - else if (is_boundary) - boundary_info = 1; - if (visited_seq.count(seq)) - continue; - visited_seq.insert(seq); - - FetchNeighbor(grid, seq, pos, dim, boundary_info, view_layer_positions, &grid.seq2neighbor[seq * 9]); - } - } - } - } -} - -void PadGrid(Grid& src, Grid& tar, std::vector& view_layer_positions) { - auto& downsample_seq = src.downsample_seq; - auto& seq2evencorner = src.seq2evencorner; - auto& seq2oddcorner = src.seq2oddcorner; - int indices[9]; - std::vector mapped_even_corners(tar.seq2grid.size(), 0); - std::vector mapped_odd_corners(tar.seq2grid.size(), 0); - for (int i = 0; i < downsample_seq.size(); ++i) { - if (seq2evencorner[i] > 0) { - mapped_even_corners[downsample_seq[i]] = 1; - } - if (seq2oddcorner[i] > 0) { - mapped_odd_corners[downsample_seq[i]] = 1; - } - } - auto& tar_seq2normal = tar.seq2normal; - auto& tar_seq2grid = tar.seq2grid; - for (int i = 0; i < tar_seq2grid.size(); ++i) { - if (mapped_even_corners[i] == 1 && mapped_odd_corners[i] == 1) - continue; - auto k = tar_seq2grid[i]; - float p[3]; - key2cornerpos(k, tar.resolution, p); - - int src_key = pos2key(p, src.resolution); - if (!src.grid2seq.count(src_key)) { - int seq = src.seq2grid.size(); - src.grid2seq[src_key] = seq; - src.seq2evencorner.emplace_back((mapped_even_corners[i] == 0)); - src.seq2oddcorner.emplace_back((mapped_odd_corners[i] == 0)); - src.seq2grid.emplace_back(src_key); - src.seq2normal.emplace_back(tar_seq2normal[i]); - FetchNeighbor(src, seq, p, tar_seq2normal[i], 0, view_layer_positions, indices); - for (int j = 0; j < 9; ++j) { - src.seq2neighbor.emplace_back(indices[j]); - } - src.downsample_seq.emplace_back(i); - } else { - int seq = src.grid2seq[src_key]; - if (mapped_even_corners[i] == 0) - src.seq2evencorner[seq] = 1; - if (mapped_odd_corners[i] == 0) - src.seq2oddcorner[seq] = 1; - } - } -} - -std::vector> build_hierarchy(std::vector view_layer_positions, - std::vector view_layer_normals, int num_level, int resolution) -{ - if (view_layer_positions.size() != 3 || num_level < 1) { - printf("Alert! We require 3 layers and at least 1 level! (%d %d)\n", view_layer_positions.size(), num_level); - return {{},{},{},{}}; - } - - std::vector grids; - grids.resize(num_level); - - std::vector seq2pos; - auto& seq2grid = grids[0].seq2grid; - auto& seq2normal = grids[0].seq2normal; - auto& grid2seq = grids[0].grid2seq; - grids[0].resolution = resolution; - grids[0].stride = 1; - - auto int64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false); - auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false); - - for (int v = 0; v < 3; ++v) { - int num_layers = view_layer_positions[v].size(0); - int height = view_layer_positions[v].size(1); - int width = view_layer_positions[v].size(2); - float* data = view_layer_positions[v].data_ptr(); - float* data_normal = view_layer_normals[v].data_ptr(); - for (int l = 0; l < num_layers; ++l) { - for (int i = 0; i < height; ++i) { - for (int j = 0; j < width; ++j) { - float* p = &data[(i * width + j) * 4]; - float* n = &data_normal[(i * width + j) * 3]; - if (p[3] == 0) - continue; - auto k = pos2key(p, resolution); - if (!grid2seq.count(k)) { - int dim = 0; - for (int d = 0; d < 3; ++d) { - if (std::abs(n[d]) > std::abs(n[dim])) - dim = d; - } - dim = (dim + 1) % 3; - grid2seq[k] = seq2grid.size(); - seq2grid.emplace_back(k); - seq2pos.push_back(p[0]); - seq2pos.push_back(p[1]); - seq2pos.push_back(p[2]); - seq2normal.emplace_back(dim); - } - } - } - data += (height * width * 4); - data_normal += (height * width * 3); - } - } - - for (int i = 0; i < num_level - 1; ++i) { - DownsampleGrid(grids[i], grids[i + 1]); - } - - for (int l = 0; l < num_level; ++l) { - grids[l].seq2neighbor.resize(grids[l].seq2grid.size() * 9, -1); - grids[l].num_origin_seq = grids[l].seq2grid.size(); - for (int d = 0; d < 3; ++d) { - NeighborGrid(grids[l], view_layer_positions, d); - } - } - - for (int i = num_level - 2; i >= 0; --i) { - PadGrid(grids[i], grids[i + 1], view_layer_positions); - } - for (int i = grids[0].num_origin_seq; i < grids[0].seq2grid.size(); ++i) { - int k = grids[0].seq2grid[i]; - float p[3]; - key2pos(k, grids[0].resolution, p); - seq2pos.push_back(p[0]); - seq2pos.push_back(p[1]); - seq2pos.push_back(p[2]); - } - - std::vector texture_positions(2); - std::vector grid_neighbors(grids.size()); - std::vector grid_downsamples(grids.size() - 1); - std::vector grid_evencorners(grids.size()); - std::vector grid_oddcorners(grids.size()); - - texture_positions[0] = torch::zeros({seq2pos.size() / 3, 3}, float_options); - texture_positions[1] = torch::zeros({seq2pos.size() / 3}, float_options); - float* positions_out_ptr = texture_positions[0].data_ptr(); - memcpy(positions_out_ptr, seq2pos.data(), sizeof(float) * seq2pos.size()); - positions_out_ptr = texture_positions[1].data_ptr(); - for (int i = 0; i < grids[0].seq2grid.size(); ++i) { - positions_out_ptr[i] = (i < grids[0].num_origin_seq); - } - - for (int i = 0; i < grids.size(); ++i) { - grid_neighbors[i] = torch::zeros({grids[i].seq2grid.size(), 9}, int64_options); - long* nptr = grid_neighbors[i].data_ptr(); - for (int j = 0; j < grids[i].seq2neighbor.size(); ++j) { - nptr[j] = grids[i].seq2neighbor[j]; - } - - grid_evencorners[i] = torch::zeros({grids[i].seq2evencorner.size()}, int64_options); - grid_oddcorners[i] = torch::zeros({grids[i].seq2oddcorner.size()}, int64_options); - long* dptr = grid_evencorners[i].data_ptr(); - for (int j = 0; j < grids[i].seq2evencorner.size(); ++j) { - dptr[j] = grids[i].seq2evencorner[j]; - } - dptr = grid_oddcorners[i].data_ptr(); - for (int j = 0; j < grids[i].seq2oddcorner.size(); ++j) { - dptr[j] = grids[i].seq2oddcorner[j]; - } - if (i + 1 < grids.size()) { - grid_downsamples[i] = torch::zeros({grids[i].downsample_seq.size()}, int64_options); - long* dptr = grid_downsamples[i].data_ptr(); - for (int j = 0; j < grids[i].downsample_seq.size(); ++j) { - dptr[j] = grids[i].downsample_seq[j]; - } - } - - } - return {texture_positions, grid_neighbors, grid_downsamples, grid_evencorners, grid_oddcorners}; -} - -std::vector> build_hierarchy_with_feat( - std::vector view_layer_positions, - std::vector view_layer_normals, - std::vector view_layer_feats, - int num_level, int resolution) -{ - if (view_layer_positions.size() != 3 || num_level < 1) { - printf("Alert! We require 3 layers and at least 1 level! (%d %d)\n", view_layer_positions.size(), num_level); - return {{},{},{},{}}; - } - - std::vector grids; - grids.resize(num_level); - - std::vector seq2pos; - std::vector seq2feat; - auto& seq2grid = grids[0].seq2grid; - auto& seq2normal = grids[0].seq2normal; - auto& grid2seq = grids[0].grid2seq; - grids[0].resolution = resolution; - grids[0].stride = 1; - - auto int64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false); - auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false); - - int feat_channel = 3; - for (int v = 0; v < 3; ++v) { - int num_layers = view_layer_positions[v].size(0); - int height = view_layer_positions[v].size(1); - int width = view_layer_positions[v].size(2); - float* data = view_layer_positions[v].data_ptr(); - float* data_normal = view_layer_normals[v].data_ptr(); - float* data_feat = view_layer_feats[v].data_ptr(); - feat_channel = view_layer_feats[v].size(3); - for (int l = 0; l < num_layers; ++l) { - for (int i = 0; i < height; ++i) { - for (int j = 0; j < width; ++j) { - float* p = &data[(i * width + j) * 4]; - float* n = &data_normal[(i * width + j) * 3]; - float* f = &data_feat[(i * width + j) * feat_channel]; - if (p[3] == 0) - continue; - auto k = pos2key(p, resolution); - if (!grid2seq.count(k)) { - int dim = 0; - for (int d = 0; d < 3; ++d) { - if (std::abs(n[d]) > std::abs(n[dim])) - dim = d; - } - dim = (dim + 1) % 3; - grid2seq[k] = seq2grid.size(); - seq2grid.emplace_back(k); - seq2pos.push_back(p[0]); - seq2pos.push_back(p[1]); - seq2pos.push_back(p[2]); - for (int c = 0; c < feat_channel; ++c) { - seq2feat.emplace_back(f[c]); - } - seq2normal.emplace_back(dim); - } - } - } - data += (height * width * 4); - data_normal += (height * width * 3); - data_feat += (height * width * feat_channel); - } - } - - for (int i = 0; i < num_level - 1; ++i) { - DownsampleGrid(grids[i], grids[i + 1]); - } - - for (int l = 0; l < num_level; ++l) { - grids[l].seq2neighbor.resize(grids[l].seq2grid.size() * 9, -1); - grids[l].num_origin_seq = grids[l].seq2grid.size(); - for (int d = 0; d < 3; ++d) { - NeighborGrid(grids[l], view_layer_positions, d); - } - } - - for (int i = num_level - 2; i >= 0; --i) { - PadGrid(grids[i], grids[i + 1], view_layer_positions); - } - for (int i = grids[0].num_origin_seq; i < grids[0].seq2grid.size(); ++i) { - int k = grids[0].seq2grid[i]; - float p[3]; - key2pos(k, grids[0].resolution, p); - seq2pos.push_back(p[0]); - seq2pos.push_back(p[1]); - seq2pos.push_back(p[2]); - for (int c = 0; c < feat_channel; ++c) { - seq2feat.emplace_back(0.5); - } - } - - std::vector texture_positions(2); - std::vector texture_feats(1); - std::vector grid_neighbors(grids.size()); - std::vector grid_downsamples(grids.size() - 1); - std::vector grid_evencorners(grids.size()); - std::vector grid_oddcorners(grids.size()); - - texture_positions[0] = torch::zeros({seq2pos.size() / 3, 3}, float_options); - texture_positions[1] = torch::zeros({seq2pos.size() / 3}, float_options); - texture_feats[0] = torch::zeros({seq2feat.size() / feat_channel, feat_channel}, float_options); - float* positions_out_ptr = texture_positions[0].data_ptr(); - memcpy(positions_out_ptr, seq2pos.data(), sizeof(float) * seq2pos.size()); - positions_out_ptr = texture_positions[1].data_ptr(); - for (int i = 0; i < grids[0].seq2grid.size(); ++i) { - positions_out_ptr[i] = (i < grids[0].num_origin_seq); - } - float* feats_out_ptr = texture_feats[0].data_ptr(); - memcpy(feats_out_ptr, seq2feat.data(), sizeof(float) * seq2feat.size()); - - for (int i = 0; i < grids.size(); ++i) { - grid_neighbors[i] = torch::zeros({grids[i].seq2grid.size(), 9}, int64_options); - long* nptr = grid_neighbors[i].data_ptr(); - for (int j = 0; j < grids[i].seq2neighbor.size(); ++j) { - nptr[j] = grids[i].seq2neighbor[j]; - } - grid_evencorners[i] = torch::zeros({grids[i].seq2evencorner.size()}, int64_options); - grid_oddcorners[i] = torch::zeros({grids[i].seq2oddcorner.size()}, int64_options); - long* dptr = grid_evencorners[i].data_ptr(); - for (int j = 0; j < grids[i].seq2evencorner.size(); ++j) { - dptr[j] = grids[i].seq2evencorner[j]; - } - dptr = grid_oddcorners[i].data_ptr(); - for (int j = 0; j < grids[i].seq2oddcorner.size(); ++j) { - dptr[j] = grids[i].seq2oddcorner[j]; - } - if (i + 1 < grids.size()) { - grid_downsamples[i] = torch::zeros({grids[i].downsample_seq.size()}, int64_options); - long* dptr = grid_downsamples[i].data_ptr(); - for (int j = 0; j < grids[i].downsample_seq.size(); ++j) { - dptr[j] = grids[i].downsample_seq[j]; - } - } - } - return {texture_positions, texture_feats, grid_neighbors, grid_downsamples, grid_evencorners, grid_oddcorners}; -} diff --git a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp b/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp deleted file mode 100644 index b3ff69f5abe309be2784303d384524774708c2a3..0000000000000000000000000000000000000000 --- a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp +++ /dev/null @@ -1,139 +0,0 @@ -#include "rasterizer.h" - -void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) { - float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0])); - float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0])); - float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1])); - float y_max = std::max(vt0[1], std::max(vt1[1],vt2[1])); - - for (int px = x_min; px < x_max + 1; ++px) { - if (px < 0 || px >= width) - continue; - for (int py = y_min; py < y_max + 1; ++py) { - if (py < 0 || py >= height) - continue; - float vt[2] = {px + 0.5, py + 0.5}; - float baryCentricCoordinate[3]; - calculateBarycentricCoordinate(vt0, vt1, vt2, vt, baryCentricCoordinate); - if (isBarycentricCoordInBounds(baryCentricCoordinate)) { - int pixel = py * width + px; - if (zbuffer == 0) { - zbuffer[pixel] = (INT64)(idx + 1); - continue; - } - - float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2]; - float depth_thres = 0; - if (d) { - depth_thres = d[pixel] * 0.49999f + 0.5f + occlusion_truncation; - } - - int z_quantize = depth * (2<<17); - INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1); - if (depth < depth_thres) - continue; - zbuffer[pixel] = std::min(zbuffer[pixel], token); - } - } - } -} - -void barycentricFromImgcoordCPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces, - float* barycentric_map, int pix) -{ - INT64 f = zbuffer[pix] % MAXINT; - if (f == (MAXINT-1)) { - findices[pix] = 0; - barycentric_map[pix * 3] = 0; - barycentric_map[pix * 3 + 1] = 0; - barycentric_map[pix * 3 + 2] = 0; - return; - } - findices[pix] = f; - f -= 1; - float barycentric[3] = {0, 0, 0}; - if (f >= 0) { - float vt[2] = {float(pix % width) + 0.5f, float(pix / width) + 0.5f}; - float* vt0_ptr = V + (F[f * 3] * 4); - float* vt1_ptr = V + (F[f * 3 + 1] * 4); - float* vt2_ptr = V + (F[f * 3 + 2] * 4); - - float vt0[2] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f}; - float vt1[2] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f}; - float vt2[2] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f}; - - calculateBarycentricCoordinate(vt0, vt1, vt2, vt, barycentric); - - barycentric[0] = barycentric[0] / vt0_ptr[3]; - barycentric[1] = barycentric[1] / vt1_ptr[3]; - barycentric[2] = barycentric[2] / vt2_ptr[3]; - float w = 1.0f / (barycentric[0] + barycentric[1] + barycentric[2]); - barycentric[0] *= w; - barycentric[1] *= w; - barycentric[2] *= w; - - } - barycentric_map[pix * 3] = barycentric[0]; - barycentric_map[pix * 3 + 1] = barycentric[1]; - barycentric_map[pix * 3 + 2] = barycentric[2]; -} - -void rasterizeImagecoordsKernelCPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces, int f) -{ - float* vt0_ptr = V + (F[f * 3] * 4); - float* vt1_ptr = V + (F[f * 3 + 1] * 4); - float* vt2_ptr = V + (F[f * 3 + 2] * 4); - - float vt0[3] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f, vt0_ptr[2] / vt0_ptr[3] * 0.49999f + 0.5f}; - float vt1[3] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f, vt1_ptr[2] / vt1_ptr[3] * 0.49999f + 0.5f}; - float vt2[3] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f, vt2_ptr[2] / vt2_ptr[3] * 0.49999f + 0.5f}; - - rasterizeTriangleCPU(f, vt0, vt1, vt2, width, height, zbuffer, d, occlusion_trunc); -} - -std::vector rasterize_image_cpu(torch::Tensor V, torch::Tensor F, torch::Tensor D, - int width, int height, float occlusion_truncation, int use_depth_prior) -{ - int num_faces = F.size(0); - int num_vertices = V.size(0); - auto options = torch::TensorOptions().dtype(torch::kInt32).requires_grad(false); - auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false); - auto findices = torch::zeros({height, width}, options); - INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1); - auto z_min = torch::ones({height, width}, INT64_options) * (long)maxint; - - if (!use_depth_prior) { - for (int i = 0; i < num_faces; ++i) { - rasterizeImagecoordsKernelCPU(V.data_ptr(), F.data_ptr(), 0, - (INT64*)z_min.data_ptr(), occlusion_truncation, width, height, num_vertices, num_faces, i); - } - } else { - for (int i = 0; i < num_faces; ++i) - rasterizeImagecoordsKernelCPU(V.data_ptr(), F.data_ptr(), D.data_ptr(), - (INT64*)z_min.data_ptr(), occlusion_truncation, width, height, num_vertices, num_faces, i); - } - - auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false); - auto barycentric = torch::zeros({height, width, 3}, float_options); - for (int i = 0; i < width * height; ++i) - barycentricFromImgcoordCPU(V.data_ptr(), F.data_ptr(), - findices.data_ptr(), (INT64*)z_min.data_ptr(), width, height, num_vertices, num_faces, barycentric.data_ptr(), i); - - return {findices, barycentric}; -} - -std::vector rasterize_image(torch::Tensor V, torch::Tensor F, torch::Tensor D, - int width, int height, float occlusion_truncation, int use_depth_prior) -{ - int device_id = V.get_device(); - if (device_id == -1) - return rasterize_image_cpu(V, F, D, width, height, occlusion_truncation, use_depth_prior); - else - return rasterize_image_gpu(V, F, D, width, height, occlusion_truncation, use_depth_prior); -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("rasterize_image", &rasterize_image, "Custom image rasterization"); - m.def("build_hierarchy", &build_hierarchy, "Custom image rasterization"); - m.def("build_hierarchy_with_feat", &build_hierarchy_with_feat, "Custom image rasterization"); -} diff --git a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h b/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h deleted file mode 100644 index cf4f9870bda0714763e4236f85293ca7cef7d51f..0000000000000000000000000000000000000000 --- a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef RASTERIZER_H_ -#define RASTERIZER_H_ - -#include -#include -#include -#include // For CUDA context - -#define INT64 unsigned long long -#define MAXINT 2147483647 - -__host__ __device__ inline float calculateSignedArea2(float* a, float* b, float* c) { - return ((c[0] - a[0]) * (b[1] - a[1]) - (b[0] - a[0]) * (c[1] - a[1])); -} - -__host__ __device__ inline void calculateBarycentricCoordinate(float* a, float* b, float* c, float* p, - float* barycentric) -{ - float beta_tri = calculateSignedArea2(a, p, c); - float gamma_tri = calculateSignedArea2(a, b, p); - float area = calculateSignedArea2(a, b, c); - if (area == 0) { - barycentric[0] = -1.0; - barycentric[1] = -1.0; - barycentric[2] = -1.0; - return; - } - float tri_inv = 1.0 / area; - float beta = beta_tri * tri_inv; - float gamma = gamma_tri * tri_inv; - float alpha = 1.0 - beta - gamma; - barycentric[0] = alpha; - barycentric[1] = beta; - barycentric[2] = gamma; -} - -__host__ __device__ inline bool isBarycentricCoordInBounds(float* barycentricCoord) { - return barycentricCoord[0] >= 0.0 && barycentricCoord[0] <= 1.0 && - barycentricCoord[1] >= 0.0 && barycentricCoord[1] <= 1.0 && - barycentricCoord[2] >= 0.0 && barycentricCoord[2] <= 1.0; -} - -std::vector rasterize_image_gpu(torch::Tensor V, torch::Tensor F, torch::Tensor D, - int width, int height, float occlusion_truncation, int use_depth_prior); - -std::vector> build_hierarchy(std::vector view_layer_positions, std::vector view_layer_normals, int num_level, int resolution); - -std::vector> build_hierarchy_with_feat( - std::vector view_layer_positions, - std::vector view_layer_normals, - std::vector view_layer_feats, - int num_level, int resolution); - -#endif \ No newline at end of file diff --git a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu b/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu deleted file mode 100644 index 709c1b86a5ee92e3c2ff6ce9df85f1492c3c5378..0000000000000000000000000000000000000000 --- a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu +++ /dev/null @@ -1,127 +0,0 @@ -#include "rasterizer.h" - -__device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) { - float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0])); - float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0])); - float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1])); - float y_max = std::max(vt0[1], std::max(vt1[1],vt2[1])); - - for (int px = x_min; px < x_max + 1; ++px) { - if (px < 0 || px >= width) - continue; - for (int py = y_min; py < y_max + 1; ++py) { - if (py < 0 || py >= height) - continue; - float vt[2] = {px + 0.5f, py + 0.5f}; - float baryCentricCoordinate[3]; - calculateBarycentricCoordinate(vt0, vt1, vt2, vt, baryCentricCoordinate); - if (isBarycentricCoordInBounds(baryCentricCoordinate)) { - int pixel = py * width + px; - if (zbuffer == 0) { - atomicExch(&zbuffer[pixel], (INT64)(idx + 1)); - continue; - } - float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2]; - float depth_thres = 0; - if (d) { - depth_thres = d[pixel] * 0.49999f + 0.5f + occlusion_truncation; - } - - int z_quantize = depth * (2<<17); - INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1); - if (depth < depth_thres) - continue; - atomicMin(&zbuffer[pixel], token); - } - } - } -} - -__global__ void barycentricFromImgcoordGPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces, - float* barycentric_map) -{ - int pix = blockIdx.x * blockDim.x + threadIdx.x; - if (pix >= width * height) - return; - INT64 f = zbuffer[pix] % MAXINT; - if (f == (MAXINT-1)) { - findices[pix] = 0; - barycentric_map[pix * 3] = 0; - barycentric_map[pix * 3 + 1] = 0; - barycentric_map[pix * 3 + 2] = 0; - return; - } - findices[pix] = f; - f -= 1; - float barycentric[3] = {0, 0, 0}; - if (f >= 0) { - float vt[2] = {float(pix % width) + 0.5f, float(pix / width) + 0.5f}; - float* vt0_ptr = V + (F[f * 3] * 4); - float* vt1_ptr = V + (F[f * 3 + 1] * 4); - float* vt2_ptr = V + (F[f * 3 + 2] * 4); - - float vt0[2] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f}; - float vt1[2] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f}; - float vt2[2] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f}; - - calculateBarycentricCoordinate(vt0, vt1, vt2, vt, barycentric); - - barycentric[0] = barycentric[0] / vt0_ptr[3]; - barycentric[1] = barycentric[1] / vt1_ptr[3]; - barycentric[2] = barycentric[2] / vt2_ptr[3]; - float w = 1.0f / (barycentric[0] + barycentric[1] + barycentric[2]); - barycentric[0] *= w; - barycentric[1] *= w; - barycentric[2] *= w; - - } - barycentric_map[pix * 3] = barycentric[0]; - barycentric_map[pix * 3 + 1] = barycentric[1]; - barycentric_map[pix * 3 + 2] = barycentric[2]; -} - -__global__ void rasterizeImagecoordsKernelGPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces) -{ - int f = blockIdx.x * blockDim.x + threadIdx.x; - if (f >= num_faces) - return; - - float* vt0_ptr = V + (F[f * 3] * 4); - float* vt1_ptr = V + (F[f * 3 + 1] * 4); - float* vt2_ptr = V + (F[f * 3 + 2] * 4); - - float vt0[3] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f, vt0_ptr[2] / vt0_ptr[3] * 0.49999f + 0.5f}; - float vt1[3] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f, vt1_ptr[2] / vt1_ptr[3] * 0.49999f + 0.5f}; - float vt2[3] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f, vt2_ptr[2] / vt2_ptr[3] * 0.49999f + 0.5f}; - - rasterizeTriangleGPU(f, vt0, vt1, vt2, width, height, zbuffer, d, occlusion_trunc); -} - -std::vector rasterize_image_gpu(torch::Tensor V, torch::Tensor F, torch::Tensor D, - int width, int height, float occlusion_truncation, int use_depth_prior) -{ - int device_id = V.get_device(); - cudaSetDevice(device_id); - int num_faces = F.size(0); - int num_vertices = V.size(0); - auto options = torch::TensorOptions().dtype(torch::kInt32).device(torch::kCUDA, device_id).requires_grad(false); - auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA, device_id).requires_grad(false); - auto findices = torch::zeros({height, width}, options); - INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1); - auto z_min = torch::ones({height, width}, INT64_options) * (long)maxint; - - if (!use_depth_prior) { - rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr(), F.data_ptr(), 0, - (INT64*)z_min.data_ptr(), occlusion_truncation, width, height, num_vertices, num_faces); - } else { - rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr(), F.data_ptr(), D.data_ptr(), - (INT64*)z_min.data_ptr(), occlusion_truncation, width, height, num_vertices, num_faces); - } - - auto float_options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, device_id).requires_grad(false); - auto barycentric = torch::zeros({height, width, 3}, float_options); - barycentricFromImgcoordGPU<<<(width * height + 255)/256, 256>>>(V.data_ptr(), F.data_ptr(), - findices.data_ptr(), (INT64*)z_min.data_ptr(), width, height, num_vertices, num_faces, barycentric.data_ptr()); - - return {findices, barycentric}; -} diff --git a/hy3dgen/texgen/differentiable_renderer/__init__.py b/hy3dgen/texgen/differentiable_renderer/__init__.py index e307c3f8c1292da02f308e4b59ef0bcd6fe7305e..8bb2bf86e283e50f0df2ecfba8fc66289df09901 100644 --- a/hy3dgen/texgen/differentiable_renderer/__init__.py +++ b/hy3dgen/texgen/differentiable_renderer/__init__.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -20,4 +10,4 @@ # their software and algorithms, including trained model weights, parameters (including # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, # fine-tuning enabling code and other elements of the foregoing made publicly available -# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. +# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. \ No newline at end of file diff --git a/hy3dgen/texgen/differentiable_renderer/camera_utils.py b/hy3dgen/texgen/differentiable_renderer/camera_utils.py index 289710ab787a174b39154f1010fc6209e4c92dfe..b67727c828662e34d14b44c9fbff9f101815fbc1 100644 --- a/hy3dgen/texgen/differentiable_renderer/camera_utils.py +++ b/hy3dgen/texgen/differentiable_renderer/camera_utils.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined diff --git a/hy3dgen/texgen/differentiable_renderer/compile_mesh_painter.bat b/hy3dgen/texgen/differentiable_renderer/compile_mesh_painter.bat new file mode 100644 index 0000000000000000000000000000000000000000..3947b0f03f9f6245dac95db7460703076444a304 --- /dev/null +++ b/hy3dgen/texgen/differentiable_renderer/compile_mesh_painter.bat @@ -0,0 +1,3 @@ +FOR /F "tokens=*" %%i IN ('python -m pybind11 --includes') DO SET PYINCLUDES=%%i +echo %PYINCLUDES% +g++ -O3 -Wall -shared -std=c++11 -fPIC %PYINCLUDES% mesh_processor.cpp -o mesh_processor.pyd -lpython3.12 \ No newline at end of file diff --git a/hy3dgen/texgen/differentiable_renderer/compile_mesh_painter.sh b/hy3dgen/texgen/differentiable_renderer/compile_mesh_painter.sh deleted file mode 100644 index 056be9dc28d7947419f06536799b64f65e4ff827..0000000000000000000000000000000000000000 --- a/hy3dgen/texgen/differentiable_renderer/compile_mesh_painter.sh +++ /dev/null @@ -1 +0,0 @@ -c++ -O3 -Wall -shared -std=c++11 -fPIC `python3 -m pybind11 --includes` mesh_processor.cpp -o mesh_processor`python3-config --extension-suffix` \ No newline at end of file diff --git a/hy3dgen/texgen/differentiable_renderer/mesh_processor.cpp b/hy3dgen/texgen/differentiable_renderer/mesh_processor.cpp index ed6ac6e62637b97bb97373fff30e90e4e2587fd8..ca8650fada02099d3fce0f551fa4f953f278cf34 100644 --- a/hy3dgen/texgen/differentiable_renderer/mesh_processor.cpp +++ b/hy3dgen/texgen/differentiable_renderer/mesh_processor.cpp @@ -1,10 +1,10 @@ -#include -#include -#include #include #include #include #include +#include +#include +#include namespace py = pybind11; using namespace std; diff --git a/hy3dgen/texgen/differentiable_renderer/mesh_processor.cpython-311-x86_64-linux-gnu.so b/hy3dgen/texgen/differentiable_renderer/mesh_processor.cpython-311-x86_64-linux-gnu.so deleted file mode 100644 index 42890fece062ce38cfd31c7fb8beb7138fcdb56e..0000000000000000000000000000000000000000 Binary files a/hy3dgen/texgen/differentiable_renderer/mesh_processor.cpython-311-x86_64-linux-gnu.so and /dev/null differ diff --git a/hy3dgen/texgen/differentiable_renderer/mesh_processor.py b/hy3dgen/texgen/differentiable_renderer/mesh_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..5a731ccea4237c4605f94c7b52ace42d590b6aa0 --- /dev/null +++ b/hy3dgen/texgen/differentiable_renderer/mesh_processor.py @@ -0,0 +1,84 @@ +# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT +# except for the third-party components listed below. +# Hunyuan 3D does not impose any additional limitations beyond what is outlined +# in the repsective licenses of these third-party components. +# Users must comply with all terms and conditions of original licenses of these third-party +# components and must ensure that the usage of the third party components adheres to +# all relevant laws and regulations. + +# For avoidance of doubts, Hunyuan 3D means the large language models and +# their software and algorithms, including trained model weights, parameters (including +# optimizer states), machine-learning model code, inference-enabling code, training-enabling code, +# fine-tuning enabling code and other elements of the foregoing made publicly available +# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. + +import numpy as np + +def meshVerticeInpaint_smooth(texture, mask, vtx_pos, vtx_uv, pos_idx, uv_idx): + texture_height, texture_width, texture_channel = texture.shape + vtx_num = vtx_pos.shape[0] + + vtx_mask = np.zeros(vtx_num, dtype=np.float32) + vtx_color = [np.zeros(texture_channel, dtype=np.float32) for _ in range(vtx_num)] + uncolored_vtxs = [] + G = [[] for _ in range(vtx_num)] + + for i in range(uv_idx.shape[0]): + for k in range(3): + vtx_uv_idx = uv_idx[i, k] + vtx_idx = pos_idx[i, k] + uv_v = int(round(vtx_uv[vtx_uv_idx, 0] * (texture_width - 1))) + uv_u = int(round((1.0 - vtx_uv[vtx_uv_idx, 1]) * (texture_height - 1))) + if mask[uv_u, uv_v] > 0: + vtx_mask[vtx_idx] = 1.0 + vtx_color[vtx_idx] = texture[uv_u, uv_v] + else: + uncolored_vtxs.append(vtx_idx) + G[pos_idx[i, k]].append(pos_idx[i, (k + 1) % 3]) + + smooth_count = 2 + last_uncolored_vtx_count = 0 + while smooth_count > 0: + uncolored_vtx_count = 0 + for vtx_idx in uncolored_vtxs: + sum_color = np.zeros(texture_channel, dtype=np.float32) + total_weight = 0.0 + vtx_0 = vtx_pos[vtx_idx] + for connected_idx in G[vtx_idx]: + if vtx_mask[connected_idx] > 0: + vtx1 = vtx_pos[connected_idx] + dist = np.sqrt(np.sum((vtx_0 - vtx1) ** 2)) + dist_weight = 1.0 / max(dist, 1e-4) + dist_weight *= dist_weight + sum_color += vtx_color[connected_idx] * dist_weight + total_weight += dist_weight + if total_weight > 0: + vtx_color[vtx_idx] = sum_color / total_weight + vtx_mask[vtx_idx] = 1.0 + else: + uncolored_vtx_count += 1 + + if last_uncolored_vtx_count == uncolored_vtx_count: + smooth_count -= 1 + else: + smooth_count += 1 + last_uncolored_vtx_count = uncolored_vtx_count + + new_texture = texture.copy() + new_mask = mask.copy() + for face_idx in range(uv_idx.shape[0]): + for k in range(3): + vtx_uv_idx = uv_idx[face_idx, k] + vtx_idx = pos_idx[face_idx, k] + if vtx_mask[vtx_idx] == 1.0: + uv_v = int(round(vtx_uv[vtx_uv_idx, 0] * (texture_width - 1))) + uv_u = int(round((1.0 - vtx_uv[vtx_uv_idx, 1]) * (texture_height - 1))) + new_texture[uv_u, uv_v] = vtx_color[vtx_idx] + new_mask[uv_u, uv_v] = 255 + return new_texture, new_mask + +def meshVerticeInpaint(texture, mask, vtx_pos, vtx_uv, pos_idx, uv_idx, method="smooth"): + if method == "smooth": + return meshVerticeInpaint_smooth(texture, mask, vtx_pos, vtx_uv, pos_idx, uv_idx) + else: + raise ValueError("Invalid method. Use 'smooth' or 'forward'.") \ No newline at end of file diff --git a/hy3dgen/texgen/differentiable_renderer/mesh_render.py b/hy3dgen/texgen/differentiable_renderer/mesh_render.py index c85b80e043221282e9ff6bfb81764fb32c5d48ed..30049efbdcce375fa13fbae26d6b9da67e21c3cb 100644 --- a/hy3dgen/texgen/differentiable_renderer/mesh_render.py +++ b/hy3dgen/texgen/differentiable_renderer/mesh_render.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined diff --git a/hy3dgen/texgen/differentiable_renderer/mesh_utils.py b/hy3dgen/texgen/differentiable_renderer/mesh_utils.py index ca0ba1a6145c68651ec033b97e80900cd2c9d7ec..fa5694a66a501262b3d50acdf1340d0fa8487dec 100644 --- a/hy3dgen/texgen/differentiable_renderer/mesh_utils.py +++ b/hy3dgen/texgen/differentiable_renderer/mesh_utils.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined diff --git a/hy3dgen/texgen/differentiable_renderer/setup.py b/hy3dgen/texgen/differentiable_renderer/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..1bfdb10a559dc63f56502f45efdef0470dd41bc5 --- /dev/null +++ b/hy3dgen/texgen/differentiable_renderer/setup.py @@ -0,0 +1,62 @@ +# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT +# except for the third-party components listed below. +# Hunyuan 3D does not impose any additional limitations beyond what is outlined +# in the repsective licenses of these third-party components. +# Users must comply with all terms and conditions of original licenses of these third-party +# components and must ensure that the usage of the third party components adheres to +# all relevant laws and regulations. + +# For avoidance of doubts, Hunyuan 3D means the large language models and +# their software and algorithms, including trained model weights, parameters (including +# optimizer states), machine-learning model code, inference-enabling code, training-enabling code, +# fine-tuning enabling code and other elements of the foregoing made publicly available +# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. + +from setuptools import setup, Extension +import pybind11 +import sys +import platform + +def get_platform_specific_args(): + system = platform.system().lower() + cpp_std = 'c++14' # Make configurable if needed + + if sys.platform == 'win32': + compile_args = ['/O2', f'/std:{cpp_std}', '/EHsc', '/MP', '/DWIN32_LEAN_AND_MEAN', '/bigobj'] + link_args = [] + extra_includes = [] + elif system == 'linux': + compile_args = ['-O3', f'-std={cpp_std}', '-fPIC', '-Wall', '-Wextra', '-pthread'] + link_args = ['-fPIC', '-pthread'] + extra_includes = [] + elif sys.platform == 'darwin': + compile_args = ['-O3', f'-std={cpp_std}', '-fPIC', '-Wall', '-Wextra', + '-stdlib=libc++', '-mmacosx-version-min=10.14'] + link_args = ['-fPIC', '-stdlib=libc++', '-mmacosx-version-min=10.14', '-dynamiclib'] + extra_includes = [] + else: + raise RuntimeError(f"Unsupported platform: {system}") + + return compile_args, link_args, extra_includes + +extra_compile_args, extra_link_args, platform_includes = get_platform_specific_args() +include_dirs = [pybind11.get_include(), pybind11.get_include(user=True)] +include_dirs.extend(platform_includes) + +ext_modules = [ + Extension( + "mesh_processor", + ["mesh_processor.cpp"], + include_dirs=include_dirs, + language='c++', + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, + ), +] + +setup( + name="mesh_processor", + ext_modules=ext_modules, + install_requires=['pybind11>=2.6.0'], + python_requires='>=3.6', +) \ No newline at end of file diff --git a/hy3dgen/texgen/hunyuanpaint/__init__.py b/hy3dgen/texgen/hunyuanpaint/__init__.py index e307c3f8c1292da02f308e4b59ef0bcd6fe7305e..8bb2bf86e283e50f0df2ecfba8fc66289df09901 100644 --- a/hy3dgen/texgen/hunyuanpaint/__init__.py +++ b/hy3dgen/texgen/hunyuanpaint/__init__.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -20,4 +10,4 @@ # their software and algorithms, including trained model weights, parameters (including # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, # fine-tuning enabling code and other elements of the foregoing made publicly available -# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. +# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. \ No newline at end of file diff --git a/hy3dgen/texgen/hunyuanpaint/pipeline.py b/hy3dgen/texgen/hunyuanpaint/pipeline.py index 436ce34efb8bc40c3df2b3902b7a29dffa39ae91..7a0c8a19604f969b20e0ffc27f6f33820a10e99a 100644 --- a/hy3dgen/texgen/hunyuanpaint/pipeline.py +++ b/hy3dgen/texgen/hunyuanpaint/pipeline.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -119,6 +109,8 @@ class HunyuanPaintPipeline(StableDiffusionPipeline): return_dict=True, **cached_condition, ): + device = self._execution_device + if image is None: raise ValueError("Inputting embeddings not supported for this pipeline. Please pass an image.") assert not isinstance(image, torch.Tensor) @@ -127,7 +119,7 @@ class HunyuanPaintPipeline(StableDiffusionPipeline): image_vae = torch.tensor(np.array(image) / 255.0) image_vae = image_vae.unsqueeze(0).permute(0, 3, 1, 2).unsqueeze(0) - image_vae = image_vae.to(device=self.vae.device, dtype=self.vae.dtype) + image_vae = image_vae.to(device=device, dtype=self.vae.dtype) batch_size = image_vae.shape[0] assert batch_size == 1 @@ -171,13 +163,13 @@ class HunyuanPaintPipeline(StableDiffusionPipeline): camera_info = cached_condition['camera_info_gen'] # B,N if isinstance(camera_info, List): camera_info = torch.tensor(camera_info) - camera_info = camera_info.to(image_vae.device).to(torch.int64) + camera_info = camera_info.to(device).to(torch.int64) cached_condition['camera_info_gen'] = camera_info if 'camera_info_ref' in cached_condition: camera_info = cached_condition['camera_info_ref'] # B,N if isinstance(camera_info, List): camera_info = torch.tensor(camera_info) - camera_info = camera_info.to(image_vae.device).to(torch.int64) + camera_info = camera_info.to(device).to(torch.int64) cached_condition['camera_info_ref'] = camera_info cached_condition['ref_latents'] = ref_latents diff --git a/hy3dgen/texgen/hunyuanpaint/unet/__init__.py b/hy3dgen/texgen/hunyuanpaint/unet/__init__.py index e307c3f8c1292da02f308e4b59ef0bcd6fe7305e..8bb2bf86e283e50f0df2ecfba8fc66289df09901 100644 --- a/hy3dgen/texgen/hunyuanpaint/unet/__init__.py +++ b/hy3dgen/texgen/hunyuanpaint/unet/__init__.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -20,4 +10,4 @@ # their software and algorithms, including trained model weights, parameters (including # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, # fine-tuning enabling code and other elements of the foregoing made publicly available -# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. +# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. \ No newline at end of file diff --git a/hy3dgen/texgen/hunyuanpaint/unet/modules.py b/hy3dgen/texgen/hunyuanpaint/unet/modules.py index 5d16bc6b6bb1ebc72c602dcb298d122429fe847d..e2ee269a74fa3941ffaacf8401b4c1a5935b74a7 100644 --- a/hy3dgen/texgen/hunyuanpaint/unet/modules.py +++ b/hy3dgen/texgen/hunyuanpaint/unet/modules.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -22,7 +12,6 @@ # fine-tuning enabling code and other elements of the foregoing made publicly available # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. - import copy import json import os diff --git a/hy3dgen/texgen/pipelines.py b/hy3dgen/texgen/pipelines.py index 7aaa0390e01a21513089b5d5bb6bd35eafb2cecd..7f2eb9d69ff9b1f8ecba0c2b7c963d8f2ac442ba 100644 --- a/hy3dgen/texgen/pipelines.py +++ b/hy3dgen/texgen/pipelines.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -24,15 +14,16 @@ import logging -import os - import numpy as np +import os import torch from PIL import Image +from typing import Union, Optional from .differentiable_renderer.mesh_render import MeshRender from .utils.dehighlight_utils import Light_Shadow_Remover from .utils.multiview_utils import Multiview_Diffusion_Net +from .utils.imagesuper_utils import Image_Super_Net from .utils.uv_warp_utils import mesh_uv_wrap logger = logging.getLogger(__name__) @@ -50,7 +41,7 @@ class Hunyuan3DTexGenConfig: self.candidate_view_weights = [1, 0.1, 0.5, 0.1, 0.05, 0.05] self.render_size = 2048 - self.texture_size = 1024 + self.texture_size = 2048 self.bake_exp = 4 self.merge_method = 'fast' @@ -77,7 +68,6 @@ class Hunyuan3DPaintPipeline: allow_patterns=["hunyuan3d-paint-v2-0/*"]) delight_model_path = os.path.join(model_path, 'hunyuan3d-delight-v2-0') multiview_model_path = os.path.join(model_path, 'hunyuan3d-paint-v2-0') - return cls(Hunyuan3DTexGenConfig(delight_model_path, multiview_model_path)) except ImportError: logger.warning( @@ -104,6 +94,11 @@ class Hunyuan3DPaintPipeline: # Load model self.models['delight_model'] = Light_Shadow_Remover(self.config) self.models['multiview_model'] = Multiview_Diffusion_Net(self.config) + # self.models['super_model'] = Image_Super_Net(self.config) + + def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"): + self.models['delight_model'].pipeline.enable_model_cpu_offload(gpu_id=gpu_id, device=device) + self.models['multiview_model'].pipeline.enable_model_cpu_offload(gpu_id=gpu_id, device=device) def render_normal_multiview(self, camera_elevs, camera_azims, use_abs_coor=True): normal_maps = [] @@ -150,6 +145,40 @@ class Hunyuan3DPaintPipeline: return texture + def recenter_image(self, image, border_ratio=0.2): + if image.mode == 'RGB': + return image + elif image.mode == 'L': + image = image.convert('RGB') + return image + + alpha_channel = np.array(image)[:, :, 3] + non_zero_indices = np.argwhere(alpha_channel > 0) + if non_zero_indices.size == 0: + raise ValueError("Image is fully transparent") + + min_row, min_col = non_zero_indices.min(axis=0) + max_row, max_col = non_zero_indices.max(axis=0) + + cropped_image = image.crop((min_col, min_row, max_col + 1, max_row + 1)) + + width, height = cropped_image.size + border_width = int(width * border_ratio) + border_height = int(height * border_ratio) + + new_width = width + 2 * border_width + new_height = height + 2 * border_height + + square_size = max(new_width, new_height) + + new_image = Image.new('RGBA', (square_size, square_size), (255, 255, 255, 0)) + + paste_x = (square_size - new_width) // 2 + border_width + paste_y = (square_size - new_height) // 2 + border_height + + new_image.paste(cropped_image, (paste_x, paste_y)) + return new_image + @torch.no_grad() def __call__(self, mesh, image): @@ -158,6 +187,8 @@ class Hunyuan3DPaintPipeline: else: image_prompt = image + image_prompt = self.recenter_image(image_prompt) + image_prompt = self.models['delight_model'](image_prompt) mesh = mesh_uv_wrap(mesh) @@ -178,6 +209,7 @@ class Hunyuan3DPaintPipeline: multiviews = self.models['multiview_model'](image_prompt, normal_maps + position_maps, camera_info) for i in range(len(multiviews)): + # multiviews[i] = self.models['super_model'](multiviews[i]) multiviews[i] = multiviews[i].resize( (self.config.render_size, self.config.render_size)) diff --git a/hy3dgen/texgen/utils/__init__.py b/hy3dgen/texgen/utils/__init__.py index e307c3f8c1292da02f308e4b59ef0bcd6fe7305e..8bb2bf86e283e50f0df2ecfba8fc66289df09901 100644 --- a/hy3dgen/texgen/utils/__init__.py +++ b/hy3dgen/texgen/utils/__init__.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -20,4 +10,4 @@ # their software and algorithms, including trained model weights, parameters (including # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, # fine-tuning enabling code and other elements of the foregoing made publicly available -# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. +# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. \ No newline at end of file diff --git a/hy3dgen/texgen/utils/alignImg4Tex_utils.py b/hy3dgen/texgen/utils/alignImg4Tex_utils.py index 0a09c17cfe1a3f1ac850688e96b66341f0226418..34df2041e598b2cbf92a345f4d003c53437dfb92 100644 --- a/hy3dgen/texgen/utils/alignImg4Tex_utils.py +++ b/hy3dgen/texgen/utils/alignImg4Tex_utils.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -22,7 +12,6 @@ # fine-tuning enabling code and other elements of the foregoing made publicly available # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. - import torch from diffusers import EulerAncestralDiscreteScheduler from diffusers import StableDiffusionControlNetPipeline, StableDiffusionXLControlNetImg2ImgPipeline, ControlNetModel, \ diff --git a/hy3dgen/texgen/utils/counter_utils.py b/hy3dgen/texgen/utils/counter_utils.py index e0374fc327ad2127ec84bb0c267c19a3b9c8d738..383a5151cb22e1c965a8432cc5a0ac312eaa9ab2 100644 --- a/hy3dgen/texgen/utils/counter_utils.py +++ b/hy3dgen/texgen/utils/counter_utils.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined diff --git a/hy3dgen/texgen/utils/dehighlight_utils.py b/hy3dgen/texgen/utils/dehighlight_utils.py index d9724aef4f0f8057473ea26e1fe248592b616608..6e6105f4537d3c9be98b1b1a33820178c262503e 100644 --- a/hy3dgen/texgen/utils/dehighlight_utils.py +++ b/hy3dgen/texgen/utils/dehighlight_utils.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -44,6 +34,33 @@ class Light_Shadow_Remover(): pipeline.set_progress_bar_config(disable=True) self.pipeline = pipeline.to(self.device, torch.float16) + + def recorrect_rgb(self, src_image, target_image, alpha_channel, scale=0.95): + + def flat_and_mask(bgr, a): + mask = torch.where(a > 0.5, True, False) + bgr_flat = bgr.reshape(-1, bgr.shape[-1]) + mask_flat = mask.reshape(-1) + bgr_flat_masked = bgr_flat[mask_flat, :] + return bgr_flat_masked + + src_flat = flat_and_mask(src_image, alpha_channel) + target_flat = flat_and_mask(target_image, alpha_channel) + corrected_bgr = torch.zeros_like(src_image) + + for i in range(3): + src_mean, src_stddev = torch.mean(src_flat[:, i]), torch.std(src_flat[:, i]) + target_mean, target_stddev = torch.mean(target_flat[:, i]), torch.std(target_flat[:, i]) + corrected_bgr[:, :, i] = torch.clamp((src_image[:, :, i] - scale * src_mean) * (target_stddev / src_stddev) + scale * target_mean, 0, 1) + + src_mse = torch.mean((src_image - target_image) ** 2) + modify_mse = torch.mean((corrected_bgr - target_image) ** 2) + if src_mse < modify_mse: + corrected_bgr = torch.cat([src_image, alpha_channel], dim=-1) + else: + corrected_bgr = torch.cat([corrected_bgr, alpha_channel], dim=-1) + + return corrected_bgr @torch.no_grad() def __call__(self, image): @@ -81,4 +98,10 @@ class Light_Shadow_Remover(): guidance_scale=self.cfg_text, ).images[0] + image_tensor = torch.tensor(np.array(image)/255.0).to(self.device) + rgb_src = image_tensor[:,:,:3] + image = self.recorrect_rgb(rgb_src, rgb_target, alpha) + image = image[:,:,:3]*image[:,:,3:] + torch.ones_like(image[:,:,:3])*(1.0-image[:,:,3:]) + image = Image.fromarray((image.cpu().numpy()*255).astype(np.uint8)) + return image diff --git a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/__init__.py b/hy3dgen/texgen/utils/imagesuper_utils.py similarity index 53% rename from hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/__init__.py rename to hy3dgen/texgen/utils/imagesuper_utils.py index df40dcc8d4819eb903263ff1faf70ce902eb7e07..0b893c53a4571b4d7eb5adf7b199e4152c0a227b 100644 --- a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/__init__.py +++ b/hy3dgen/texgen/utils/imagesuper_utils.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -22,11 +12,23 @@ # fine-tuning enabling code and other elements of the foregoing made publicly available # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. -''' -from .hierarchy import BuildHierarchy, BuildHierarchyWithColor -from .io_obj import LoadObj, LoadObjWithTexture -from .render import rasterize, interpolate -''' -from .io_glb import * -from .io_obj import * -from .render import * +import torch +from diffusers import StableDiffusionUpscalePipeline + +class Image_Super_Net(): + def __init__(self, config): + self.up_pipeline_x4 = StableDiffusionUpscalePipeline.from_pretrained( + 'stabilityai/stable-diffusion-x4-upscaler', + torch_dtype=torch.float16, + ).to(config.device) + self.up_pipeline_x4.set_progress_bar_config(disable=True) + + def __call__(self, image, prompt=''): + with torch.no_grad(): + upscaled_image = self.up_pipeline_x4( + prompt=[prompt], + image=image, + num_inference_steps=5, + ).images[0] + + return upscaled_image diff --git a/hy3dgen/texgen/utils/multiview_utils.py b/hy3dgen/texgen/utils/multiview_utils.py index de10b6b9dbe8667be1905c7f4039b6cc28d39bcd..121c04947d1bd18d8eb48c01e218d6637509450d 100644 --- a/hy3dgen/texgen/utils/multiview_utils.py +++ b/hy3dgen/texgen/utils/multiview_utils.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined diff --git a/hy3dgen/texgen/utils/simplify_mesh_utils.py b/hy3dgen/texgen/utils/simplify_mesh_utils.py index 915284d337e648c57fae886dee3333c0203856b6..5c239993dd1498feb1d95e32f6b0df4a3e88da1e 100644 --- a/hy3dgen/texgen/utils/simplify_mesh_utils.py +++ b/hy3dgen/texgen/utils/simplify_mesh_utils.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined diff --git a/hy3dgen/texgen/utils/uv_warp_utils.py b/hy3dgen/texgen/utils/uv_warp_utils.py index b14bc7e58a089e2dc1c74242c527a539a05d8478..f55a924f16aa6b2dce3347f39ff7e8ef190065f4 100644 --- a/hy3dgen/texgen/utils/uv_warp_utils.py +++ b/hy3dgen/texgen/utils/uv_warp_utils.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -30,8 +20,8 @@ def mesh_uv_wrap(mesh): if isinstance(mesh, trimesh.Scene): mesh = mesh.dump(concatenate=True) - if len(mesh.faces) > 50000: - raise ValueError("The mesh has more than 50,000 faces, which is not supported.") + if len(mesh.faces) > 500000000: + raise ValueError("The mesh has more than 500,000,000 faces, which is not supported.") vmapping, indices, uvs = xatlas.parametrize(mesh.vertices, mesh.faces) diff --git a/hy3dgen/text2image.py b/hy3dgen/text2image.py index bea52937bea4fffa306f34031f594e26c1ff27a4..2c8a3ab373750ab8ab12a497f15a03179f49ea07 100644 --- a/hy3dgen/text2image.py +++ b/hy3dgen/text2image.py @@ -1,13 +1,3 @@ -# Open Source Model Licensed under the Apache License Version 2.0 -# and Other Licenses of the Third-Party Components therein: -# The below Model in this distribution may have been modified by THL A29 Limited -# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. - -# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. -# The below software and/or models in this distribution may have been -# modified by THL A29 Limited ("Tencent Modifications"). -# All Tencent Modifications are Copyright (C) THL A29 Limited. - # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT # except for the third-party components listed below. # Hunyuan 3D does not impose any additional limitations beyond what is outlined @@ -22,7 +12,6 @@ # fine-tuning enabling code and other elements of the foregoing made publicly available # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. - import os import random @@ -80,9 +69,9 @@ class HunyuanDiTPipeline: generator = torch.Generator(device=self.pipe.device) generator = generator.manual_seed(int(seed)) out_img = self.pipe( - prompt=self.pos_txt+prompt, + prompt=prompt[:60] + self.pos_txt, negative_prompt=self.neg_txt, - num_inference_steps=20, + num_inference_steps=25, pag_scale=1.3, width=1024, height=1024, diff --git a/requirements.txt b/requirements.txt index 70b0d154fde03b11b4d54117f8c8fb6c45385b21..abdab84e043a82dc588767e52e925971dd183d4f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,36 +1,40 @@ -gradio_litmodel3d ninja pybind11 -trimesh + diffusers -tqdm einops opencv-python numpy torch transformers torchvision -torchaudio -ConfigArgParse -xatlas -scikit-learn -scikit-image -tritonclient -gevent -geventhttpclient -facexlib -accelerate -ipdb +#taming-transformers-rom1504 +#ConfigArgParse +#ipdb omegaconf + +#sentencepiece +tqdm + +# Mesh Processing +trimesh pymeshlab -pytorch_lightning -taming-transformers-rom1504 -kornia -rembg -onnxruntime pygltflib -sentencepiece +xatlas +#kornia +#facexlib + +# Training +accelerate +#pytorch_lightning +#scikit-learn +#scikit-image + +# Demo only gradio +fastapi uvicorn -fastapi==0.112.2 -wheel +rembg +onnxruntime +#gevent +#geventhttpclient \ No newline at end of file