diff --git a/.gitignore b/.gitignore
index e1958f72f28f9a0c64b98eae9a18346fc604c39e..8e994243ec63c9302abe0c5e8c660148385ee5ef 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,7 @@
 __pycache__/
 *.py[cod]
 *$py.class
-
+.DS_Store
 # C extensions
 *.so
 
diff --git a/README.md b/README.md
index 6a7d28e5bc1eead243670220e6d55990f74e47ce..e1378d3b8761bdb8254d0f2c7df2983a4820cb97 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ colorFrom: purple
 colorTo: red
 sdk: gradio
 sdk_version: 4.44.1
-app_file: hg_app.py
+app_file: gradio_app.py
 pinned: false
 short_description: Text-to-3D and Image-to-3D Generation
 models:
diff --git a/assets/env_maps/gradient.jpg b/assets/env_maps/gradient.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..55546c1f260daa7d3c6eef36b70fe5d7e1697df0
Binary files /dev/null and b/assets/env_maps/gradient.jpg differ
diff --git a/assets/env_maps/white.jpg b/assets/env_maps/white.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f7af1237608dd1d486edb9298c04fbe15ec74185
Binary files /dev/null and b/assets/env_maps/white.jpg differ
diff --git a/assets/example_images/004.png b/assets/example_images/004.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/052.png b/assets/example_images/052.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/073.png b/assets/example_images/073.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/075.png b/assets/example_images/075.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1008.png b/assets/example_images/1008.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/101.png b/assets/example_images/101.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1022.png b/assets/example_images/1022.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1029.png b/assets/example_images/1029.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1037.png b/assets/example_images/1037.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1079.png b/assets/example_images/1079.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1111.png b/assets/example_images/1111.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1123.png b/assets/example_images/1123.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1128.png b/assets/example_images/1128.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1135.png b/assets/example_images/1135.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1146.png b/assets/example_images/1146.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1148.png b/assets/example_images/1148.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1154.png b/assets/example_images/1154.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1180.png b/assets/example_images/1180.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1196.png b/assets/example_images/1196.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1204.png b/assets/example_images/1204.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1234.png b/assets/example_images/1234.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1310.png b/assets/example_images/1310.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1316.png b/assets/example_images/1316.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1354.png b/assets/example_images/1354.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1429.png b/assets/example_images/1429.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1493.png b/assets/example_images/1493.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1582.png b/assets/example_images/1582.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1583.png b/assets/example_images/1583.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1596.png b/assets/example_images/1596.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1601.png b/assets/example_images/1601.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1603.png b/assets/example_images/1603.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1626.png b/assets/example_images/1626.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1627.png b/assets/example_images/1627.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1654.png b/assets/example_images/1654.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/167.png b/assets/example_images/167.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1670.png b/assets/example_images/1670.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1679.png b/assets/example_images/1679.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1687.png b/assets/example_images/1687.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1698.png b/assets/example_images/1698.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1715.png b/assets/example_images/1715.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1735.png b/assets/example_images/1735.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1738.png b/assets/example_images/1738.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1744.png b/assets/example_images/1744.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1758.png b/assets/example_images/1758.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1772.png b/assets/example_images/1772.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1773.png b/assets/example_images/1773.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1778.png b/assets/example_images/1778.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/179.png b/assets/example_images/179.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/1898.png b/assets/example_images/1898.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/191.png b/assets/example_images/191.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/195.png b/assets/example_images/195.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/197.png b/assets/example_images/197.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/198.png b/assets/example_images/198.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/202.png b/assets/example_images/202.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/203.png b/assets/example_images/203.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/218.png b/assets/example_images/218.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/219.png b/assets/example_images/219.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/379.png b/assets/example_images/379.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/380.png b/assets/example_images/380.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/419.png b/assets/example_images/419.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/583.png b/assets/example_images/583.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/888.png b/assets/example_images/888.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/895.png b/assets/example_images/895.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/example_000.png b/assets/example_images/example_000.png
old mode 100644
new mode 100755
diff --git a/assets/example_images/example_002.png b/assets/example_images/example_002.png
old mode 100644
new mode 100755
diff --git a/assets/example_mv_images/1/back.png b/assets/example_mv_images/1/back.png
new file mode 100644
index 0000000000000000000000000000000000000000..b4e05098bbb96c51c6a4413957d91d8385670bd3
Binary files /dev/null and b/assets/example_mv_images/1/back.png differ
diff --git a/assets/example_mv_images/1/front.png b/assets/example_mv_images/1/front.png
new file mode 100644
index 0000000000000000000000000000000000000000..1417f8cf567224b8c56dc24da659184b3fb89bc9
Binary files /dev/null and b/assets/example_mv_images/1/front.png differ
diff --git a/assets/example_mv_images/1/left.png b/assets/example_mv_images/1/left.png
new file mode 100644
index 0000000000000000000000000000000000000000..ba76d3154b97be521d24d43a544563476a7b947c
Binary files /dev/null and b/assets/example_mv_images/1/left.png differ
diff --git a/assets/example_mv_images/10/back.png b/assets/example_mv_images/10/back.png
new file mode 100644
index 0000000000000000000000000000000000000000..eef6ab06e7f3607a816717c58cad5e2bf9e2492a
Binary files /dev/null and b/assets/example_mv_images/10/back.png differ
diff --git a/assets/example_mv_images/10/front.png b/assets/example_mv_images/10/front.png
new file mode 100644
index 0000000000000000000000000000000000000000..dda89b7a81936ffbc044d14d92086a73aaa7d80b
Binary files /dev/null and b/assets/example_mv_images/10/front.png differ
diff --git a/assets/example_mv_images/10/left.png b/assets/example_mv_images/10/left.png
new file mode 100644
index 0000000000000000000000000000000000000000..e0579df655cbde20373e07ff9a219d58cd59910f
Binary files /dev/null and b/assets/example_mv_images/10/left.png differ
diff --git a/assets/example_mv_images/11/back.png b/assets/example_mv_images/11/back.png
new file mode 100644
index 0000000000000000000000000000000000000000..b586caf6a15a34f66d649207e761b388c68cde77
Binary files /dev/null and b/assets/example_mv_images/11/back.png differ
diff --git a/assets/example_mv_images/11/front.png b/assets/example_mv_images/11/front.png
new file mode 100644
index 0000000000000000000000000000000000000000..595f9d6efa05e62469411286f9e126cc8c378f59
Binary files /dev/null and b/assets/example_mv_images/11/front.png differ
diff --git a/assets/example_mv_images/11/left.png b/assets/example_mv_images/11/left.png
new file mode 100644
index 0000000000000000000000000000000000000000..e83eccfdfe06c8667cf93a81dec0e7368d915336
Binary files /dev/null and b/assets/example_mv_images/11/left.png differ
diff --git a/assets/example_mv_images/12/back.png b/assets/example_mv_images/12/back.png
new file mode 100644
index 0000000000000000000000000000000000000000..c49e0fc43471bd5d2f070044958226b712c53258
Binary files /dev/null and b/assets/example_mv_images/12/back.png differ
diff --git a/assets/example_mv_images/12/front.png b/assets/example_mv_images/12/front.png
new file mode 100644
index 0000000000000000000000000000000000000000..148cd51aa47b0cf81fe30a2be09e581d78803a67
Binary files /dev/null and b/assets/example_mv_images/12/front.png differ
diff --git a/assets/example_mv_images/12/left.png b/assets/example_mv_images/12/left.png
new file mode 100644
index 0000000000000000000000000000000000000000..4fbdb357503ef9140679beb977dab84b6fbae925
Binary files /dev/null and b/assets/example_mv_images/12/left.png differ
diff --git a/assets/example_mv_images/13/back.png b/assets/example_mv_images/13/back.png
new file mode 100644
index 0000000000000000000000000000000000000000..26685f594bdac0907d4897c59bc51c299c354abc
Binary files /dev/null and b/assets/example_mv_images/13/back.png differ
diff --git a/assets/example_mv_images/13/front.png b/assets/example_mv_images/13/front.png
new file mode 100644
index 0000000000000000000000000000000000000000..95053ac3ebf258b5ec9840ba1efc64d8eb853621
Binary files /dev/null and b/assets/example_mv_images/13/front.png differ
diff --git a/assets/example_mv_images/13/left.png b/assets/example_mv_images/13/left.png
new file mode 100644
index 0000000000000000000000000000000000000000..34fe6634efaa68963f54f1c96c0ef3a9ce324cff
Binary files /dev/null and b/assets/example_mv_images/13/left.png differ
diff --git a/assets/example_mv_images/14/back.png b/assets/example_mv_images/14/back.png
new file mode 100644
index 0000000000000000000000000000000000000000..1a48313bb0d002c8b4328f446c85ab98d119306a
Binary files /dev/null and b/assets/example_mv_images/14/back.png differ
diff --git a/assets/example_mv_images/14/front.png b/assets/example_mv_images/14/front.png
new file mode 100644
index 0000000000000000000000000000000000000000..3b58dfa99c4c574df6e2dc711db29cddae3ecfb1
Binary files /dev/null and b/assets/example_mv_images/14/front.png differ
diff --git a/assets/example_mv_images/14/left.png b/assets/example_mv_images/14/left.png
new file mode 100644
index 0000000000000000000000000000000000000000..9842b3afce8da368e765a23b3c331d5470651d34
Binary files /dev/null and b/assets/example_mv_images/14/left.png differ
diff --git a/assets/example_mv_images/2/back.png b/assets/example_mv_images/2/back.png
new file mode 100644
index 0000000000000000000000000000000000000000..88a05130fd9e66a8d47aba17daab335349fb71ec
Binary files /dev/null and b/assets/example_mv_images/2/back.png differ
diff --git a/assets/example_mv_images/2/front.png b/assets/example_mv_images/2/front.png
new file mode 100644
index 0000000000000000000000000000000000000000..35c55ba662a6b7d56ece98db4e5d77ff0ef696f6
Binary files /dev/null and b/assets/example_mv_images/2/front.png differ
diff --git a/assets/example_mv_images/2/left.png b/assets/example_mv_images/2/left.png
new file mode 100644
index 0000000000000000000000000000000000000000..bd47d6666218a69b8ce03510f7898a07665cbbf8
Binary files /dev/null and b/assets/example_mv_images/2/left.png differ
diff --git a/assets/example_mv_images/3/back.png b/assets/example_mv_images/3/back.png
new file mode 100644
index 0000000000000000000000000000000000000000..98185fe597c945acadc91c96b0d7a4da84f1eaee
Binary files /dev/null and b/assets/example_mv_images/3/back.png differ
diff --git a/assets/example_mv_images/3/front.png b/assets/example_mv_images/3/front.png
new file mode 100644
index 0000000000000000000000000000000000000000..1265af6287dc35c2932a5b32d7a88b9bfda89bf6
Binary files /dev/null and b/assets/example_mv_images/3/front.png differ
diff --git a/assets/example_mv_images/3/left.png b/assets/example_mv_images/3/left.png
new file mode 100644
index 0000000000000000000000000000000000000000..df83c19e2c61b09baaa0fdeb02c187d1292c1eef
Binary files /dev/null and b/assets/example_mv_images/3/left.png differ
diff --git a/assets/example_mv_images/4/back.png b/assets/example_mv_images/4/back.png
new file mode 100644
index 0000000000000000000000000000000000000000..c818617090ab64610b93f3f5d0ee932f7b42be52
Binary files /dev/null and b/assets/example_mv_images/4/back.png differ
diff --git a/assets/example_mv_images/4/front.png b/assets/example_mv_images/4/front.png
new file mode 100644
index 0000000000000000000000000000000000000000..8758fd6e8993335f7ca8989c6016fa2a5d320389
Binary files /dev/null and b/assets/example_mv_images/4/front.png differ
diff --git a/assets/example_mv_images/4/left.png b/assets/example_mv_images/4/left.png
new file mode 100644
index 0000000000000000000000000000000000000000..584be7f0d23090c6fcf24637d33dea73c7504b20
Binary files /dev/null and b/assets/example_mv_images/4/left.png differ
diff --git a/assets/example_mv_images/5/back.png b/assets/example_mv_images/5/back.png
new file mode 100644
index 0000000000000000000000000000000000000000..71e53e10727609bb7c10f1af5534c8f0b2d9f672
Binary files /dev/null and b/assets/example_mv_images/5/back.png differ
diff --git a/assets/example_mv_images/5/front.png b/assets/example_mv_images/5/front.png
new file mode 100644
index 0000000000000000000000000000000000000000..041f4ac1aa2398d6e2645582d15ffe9d8351a845
Binary files /dev/null and b/assets/example_mv_images/5/front.png differ
diff --git a/assets/example_mv_images/5/left.png b/assets/example_mv_images/5/left.png
new file mode 100644
index 0000000000000000000000000000000000000000..2337b260261634d7d6deab2e4b787decc2461758
Binary files /dev/null and b/assets/example_mv_images/5/left.png differ
diff --git a/assets/example_mv_images/6/back.png b/assets/example_mv_images/6/back.png
new file mode 100644
index 0000000000000000000000000000000000000000..6ceb5d8ab294d234c2f4a7861e012653c57e4a8d
Binary files /dev/null and b/assets/example_mv_images/6/back.png differ
diff --git a/assets/example_mv_images/6/front.png b/assets/example_mv_images/6/front.png
new file mode 100644
index 0000000000000000000000000000000000000000..95fc2c0187cadd47b18a8b005e9941ed4c5dee0e
Binary files /dev/null and b/assets/example_mv_images/6/front.png differ
diff --git a/assets/example_mv_images/6/left.png b/assets/example_mv_images/6/left.png
new file mode 100644
index 0000000000000000000000000000000000000000..944a731239630512747b5bcef82091bd04336b69
Binary files /dev/null and b/assets/example_mv_images/6/left.png differ
diff --git a/assets/example_mv_images/7/back.png b/assets/example_mv_images/7/back.png
new file mode 100644
index 0000000000000000000000000000000000000000..5ef772d7a9f4d54844c4ef693b034eab5376fdf1
Binary files /dev/null and b/assets/example_mv_images/7/back.png differ
diff --git a/assets/example_mv_images/7/front.png b/assets/example_mv_images/7/front.png
new file mode 100644
index 0000000000000000000000000000000000000000..01b20d8e5fc9d5d79b85d663313c3e39f524eb68
Binary files /dev/null and b/assets/example_mv_images/7/front.png differ
diff --git a/assets/example_mv_images/7/left.png b/assets/example_mv_images/7/left.png
new file mode 100644
index 0000000000000000000000000000000000000000..bfa778a0bc7de8356d1223e1f22409f5ea6cd2ca
Binary files /dev/null and b/assets/example_mv_images/7/left.png differ
diff --git a/assets/example_mv_images/8/back.png b/assets/example_mv_images/8/back.png
new file mode 100644
index 0000000000000000000000000000000000000000..d1d6b9d0bfba93d838d41965faf5307e97926501
Binary files /dev/null and b/assets/example_mv_images/8/back.png differ
diff --git a/assets/example_mv_images/8/front.png b/assets/example_mv_images/8/front.png
new file mode 100644
index 0000000000000000000000000000000000000000..9e3c6d8d56b881e110a3a8fadb6de667468622d1
Binary files /dev/null and b/assets/example_mv_images/8/front.png differ
diff --git a/assets/example_mv_images/8/left.png b/assets/example_mv_images/8/left.png
new file mode 100644
index 0000000000000000000000000000000000000000..2aeb68a0f237da6b9e456f62ad35a38b06eb7603
Binary files /dev/null and b/assets/example_mv_images/8/left.png differ
diff --git a/assets/modelviewer-template.html b/assets/modelviewer-template.html
index 5a81985e2ccc115efdb2848523da731945bb0957..0c75c6be82a0f85c5bb54a4402f1402a20f5a51e 100644
--- a/assets/modelviewer-template.html
+++ b/assets/modelviewer-template.html
@@ -3,22 +3,41 @@
 
 <head>
     <!-- Import the component -->
-    <!--<script src="https://ajax.googleapis.com/ajax/libs/model-viewer/3.1.1/model-viewer.min.js" type="module"></script> -->
-    <script src="https://cdn.jsdelivr.net/npm/@google/model-viewer@3.1.1/dist/model-viewer.min.js" type="module"></script>
+    <script src="https://ajax.googleapis.com/ajax/libs/model-viewer/3.1.1/model-viewer.min.js" type="module"></script>
 
     <script>
         document.addEventListener('DOMContentLoaded', () => {
             const modelViewers = document.querySelectorAll('model-viewer');
+            const isSafari = /^((?!chrome|android).)*safari/i.test(navigator.userAgent);
 
             modelViewers.forEach(modelViewer => {
-                modelViewer.addEventListener('load', (event) => {
-                    const [material] = modelViewer.model.materials;
-                    let color = [43, 44, 46, 255];
-                    color = color.map(x => x / 255);
-                    material.pbrMetallicRoughness.setMetallicFactor(0.1); // 完全金属
-                    material.pbrMetallicRoughness.setRoughnessFactor(0.7); // 低粗糙度
-                    material.pbrMetallicRoughness.setBaseColorFactor(color);  // CornflowerBlue in RGB
-                });
+                //modelViewer.setAttribute(
+                //    "environment-image",
+                //    "/static/env_maps/gradient.jpg"
+                //);
+                // if (!isSafari) {
+                //     modelViewer.setAttribute(
+                //         "environment-image",
+                //         "/static/env_maps/gradient.jpg"
+                //     );
+                // } else {
+                //     modelViewer.addEventListener('load', (event) => {
+                //         const [material] = modelViewer.model.materials;
+                //         let color = [43, 44, 46, 255];
+                //         color = color.map(x => x / 255);
+                //         material.pbrMetallicRoughness.setMetallicFactor(0.1); // 完全金属
+                //         material.pbrMetallicRoughness.setRoughnessFactor(0.7); // 低粗糙度
+                //         material.pbrMetallicRoughness.setBaseColorFactor(color);  // CornflowerBlue in RGB
+                //     });
+                // }
+                 modelViewer.addEventListener('load', (event) => {
+                     const [material] = modelViewer.model.materials;
+                     let color = [43, 44, 46, 255];
+                     color = color.map(x => x / 255);
+                     material.pbrMetallicRoughness.setMetallicFactor(0.1); // 完全金属
+                     material.pbrMetallicRoughness.setRoughnessFactor(0.7); // 低粗糙度
+                     material.pbrMetallicRoughness.setBaseColorFactor(color);  // CornflowerBlue in RGB
+                 });
             });
         });
     </script>
@@ -43,8 +62,20 @@
 
 <body>
 <div class="centered-container">
-    <model-viewer>
+    <div class="column is-mobile is-centered">
+        <model-viewer id="modelviewer" style="height: #height#px; width: #width#px;"
+                      rotation-per-second="10deg"
+                      src="#src#" disable-tap
+                      environment-image="neutral"
+                      camera-target="0m 0m 0m"
+                      camera-orbit="0deg 90deg 8m"
+                      orientation="0deg 0deg 0deg"
+                      shadow-intensity=".9"
+                      ar auto-rotate
+                      camera-controls>
+        </model-viewer>
+    </div>
 </div>
 </body>
 
-</html>
+</html>
\ No newline at end of file
diff --git a/assets/modelviewer-textured-template.html b/assets/modelviewer-textured-template.html
index c873e01dcc5d69f6a65f875362403c60c4874617..ae6558184d9c27273e135a7e2f240415e5f2b953 100644
--- a/assets/modelviewer-textured-template.html
+++ b/assets/modelviewer-textured-template.html
@@ -3,8 +3,7 @@
 
 <head>
     <!-- Import the component -->
-    <!--<script src="https://ajax.googleapis.com/ajax/libs/model-viewer/3.1.1/model-viewer.min.js" type="module"></script> -->
-    <script src="https://cdn.jsdelivr.net/npm/@google/model-viewer@3.1.1/dist/model-viewer.min.js" type="module"></script>
+    <script src="https://ajax.googleapis.com/ajax/libs/model-viewer/3.1.1/model-viewer.min.js" type="module"></script>
 
     <style>
         body {
@@ -16,20 +15,74 @@
             display: flex;
             justify-content: center;
             align-items: center;
-            border-radius: 8px;
-            border-color: #e5e7eb;
-            border-style: solid;
-            border-width: 1px;
         }
+
+        .modelviewer-panel-button {
+            height: 30px;
+            margin: 4px 4px;
+            padding: 0px 14px;
+            background: white;
+            border-radius: 10px;
+            box-shadow: 0px 0px 4px rgba(0, 0, 0, 0.25);
+            font-size: 14px;
+            font-weight: 600;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            cursor: pointer;
+            transition: all 0.2s ease;
+        }
+
+        .modelviewer-panel-button.checked {
+            background: #6567C9;
+            color: white;
+        }
+
+        .modelviewer-panel-button:hover {
+            background-color: #e2e6ea;
+        }
+
+        .modelviewer-panel-button-container {
+            display: flex;
+            justify-content: space-around;
+        }
+
+        .centered-container {
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+        }
+
     </style>
 </head>
 
 <body>
 <div class="centered-container">
-    <model-viewer>
+    <div class="centered-container">
+        <div class="column is-mobile is-centered">
+            <model-viewer id="modelviewer" style="height: #height#px; width: #width#px;"
+                          rotation-per-second="10deg"
+                          src="#src#" disable-tap
+                          environment-image="neutral"
+                          camera-target="0m 0m 0m"
+                          camera-orbit="0deg 90deg 12m"
+                          orientation="0deg 0deg 0deg"
+                          shadow-intensity=".9"
+                          ar auto-rotate
+                          camera-controls>
+            </model-viewer>
+        </div>
+
+        <div class="modelviewer-panel-button-container">
+            <div id="appearance-button" class="modelviewer-panel-button small checked" onclick="showTexture()">
+                Appearance
+            </div>
+            <div id="geometry-button" class="modelviewer-panel-button small" onclick="hideTexture()">Geometry</div>
+        </div>
+    </div>
 </div>
 
-  <script>
+<script>
     document.addEventListener('DOMContentLoaded', () => {
         const modelViewers = document.querySelectorAll('model-viewer');
 
@@ -41,7 +94,43 @@
             });
         });
     });
+
+    var window_state = {};
+
+    function hideTexture() {
+        let appearanceButton = document.getElementById('appearance-button');
+        let geometryButton = document.getElementById('geometry-button');
+        appearanceButton.classList.remove('checked');
+        geometryButton.classList.add('checked');
+        let modelViewer = document.getElementById('modelviewer');
+        if (modelViewer.model.materials[0].pbrMetallicRoughness.baseColorTexture.texture === null) return;
+        window_state.textures = [];
+        for (let i = 0; i < modelViewer.model.materials.length; i++) {
+            window_state.textures.push(modelViewer.model.materials[i].pbrMetallicRoughness.baseColorTexture.texture);
+        }
+        window_state.exposure = modelViewer.exposure;
+        modelViewer.environmentImage = '/static/env_maps/gradient.jpg';
+        for (let i = 0; i < modelViewer.model.materials.length; i++) {
+            modelViewer.model.materials[i].pbrMetallicRoughness.baseColorTexture.setTexture(null);
+        }
+        modelViewer.exposure = 4;
+    }
+
+    function showTexture() {
+        let appearanceButton = document.getElementById('appearance-button');
+        let geometryButton = document.getElementById('geometry-button');
+        appearanceButton.classList.add('checked');
+        geometryButton.classList.remove('checked');
+        let modelViewer = document.getElementById('modelviewer');
+        if (modelViewer.model.materials[0].pbrMetallicRoughness.baseColorTexture.texture !== null) return;
+        modelViewer.environmentImage = '/static/env_maps/white.jpg';
+        for (let i = 0; i < modelViewer.model.materials.length; i++) {
+            modelViewer.model.materials[i].pbrMetallicRoughness.baseColorTexture.setTexture(window_state.textures[i]);
+        }
+        modelViewer.exposure = window_state.exposure;
+    }
+
 </script>
 </body>
 
-</html>
+</html>
\ No newline at end of file
diff --git a/gradio_app.py b/gradio_app.py
old mode 100644
new mode 100755
index 2ec55ded69db8b8659b548ff6e037b03d2745d4f..526569c5eed3a9a88cebe8a78808cd99636baba4
--- a/gradio_app.py
+++ b/gradio_app.py
@@ -1,4 +1,19 @@
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+
 import os
+import random
 import shutil
 import time
 from glob import glob
@@ -6,47 +21,98 @@ from pathlib import Path
 
 import gradio as gr
 import torch
+import trimesh
 import uvicorn
 from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
+import uuid
+
+from hy3dgen.shapegen.utils import logger
+
+MAX_SEED = 1e7
+
+if True:
+    import os
+    import spaces
+    import subprocess
+    import sys
+    import shlex
+    print("cd /home/user/app/hy3dgen/texgen/differentiable_renderer/ && bash compile_mesh_painter.sh")
+    os.system("cd /home/user/app/hy3dgen/texgen/differentiable_renderer/ && bash compile_mesh_painter.sh")
+    print('install custom')
+    subprocess.run(shlex.split("pip install custom_rasterizer-0.1-cp310-cp310-linux_x86_64.whl"), check=True)
 
 
 def get_example_img_list():
     print('Loading example img list ...')
-    return sorted(glob('./assets/example_images/*.png'))
+    return sorted(glob('./assets/example_images/**/*.png', recursive=True))
 
 
 def get_example_txt_list():
     print('Loading example txt list ...')
     txt_list = list()
-    for line in open('./assets/example_prompts.txt'):
+    for line in open('./assets/example_prompts.txt', encoding='utf-8'):
         txt_list.append(line.strip())
     return txt_list
 
 
-def gen_save_folder(max_size=60):
+def get_example_mv_list():
+    print('Loading example mv list ...')
+    mv_list = list()
+    root = './assets/example_mv_images'
+    for mv_dir in os.listdir(root):
+        view_list = []
+        for view in ['front', 'back', 'left', 'right']:
+            path = os.path.join(root, mv_dir, f'{view}.png')
+            if os.path.exists(path):
+                view_list.append(path)
+            else:
+                view_list.append(None)
+        mv_list.append(view_list)
+    return mv_list
+
+
+def gen_save_folder(max_size=200):
     os.makedirs(SAVE_DIR, exist_ok=True)
-    exists = set(int(_) for _ in os.listdir(SAVE_DIR) if not _.startswith("."))
-    cur_id = min(set(range(max_size)) - exists) if len(exists) < max_size else -1
-    if os.path.exists(f"{SAVE_DIR}/{(cur_id + 1) % max_size}"):
-        shutil.rmtree(f"{SAVE_DIR}/{(cur_id + 1) % max_size}")
-        print(f"remove {SAVE_DIR}/{(cur_id + 1) % max_size} success !!!")
-    save_folder = f"{SAVE_DIR}/{max(0, cur_id)}"
-    os.makedirs(save_folder, exist_ok=True)
-    print(f"mkdir {save_folder} suceess !!!")
-    return save_folder
-
-
-def export_mesh(mesh, save_folder, textured=False):
+
+    # 获取所有文件夹路径
+    dirs = [f for f in Path(SAVE_DIR).iterdir() if f.is_dir()]
+
+    # 如果文件夹数量超过 max_size，删除创建时间最久的文件夹
+    if len(dirs) >= max_size:
+        # 按创建时间排序，最久的排在前面
+        oldest_dir = min(dirs, key=lambda x: x.stat().st_ctime)
+        shutil.rmtree(oldest_dir)
+        print(f"Removed the oldest folder: {oldest_dir}")
+
+    # 生成一个新的 uuid 文件夹名称
+    new_folder = os.path.join(SAVE_DIR, str(uuid.uuid4()))
+    os.makedirs(new_folder, exist_ok=True)
+    print(f"Created new folder: {new_folder}")
+
+    return new_folder
+
+
+def export_mesh(mesh, save_folder, textured=False, type='glb'):
     if textured:
-        path = os.path.join(save_folder, f'textured_mesh.glb')
+        path = os.path.join(save_folder, f'textured_mesh.{type}')
     else:
-        path = os.path.join(save_folder, f'white_mesh.glb')
-    mesh.export(path, include_normals=textured)
+        path = os.path.join(save_folder, f'white_mesh.{type}')
+    if type not in ['glb', 'obj']:
+        mesh.export(path)
+    else:
+        mesh.export(path, include_normals=textured)
     return path
 
 
+def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    return seed
+
+
 def build_model_viewer_html(save_folder, height=660, width=790, textured=False):
+    # Remove first folder from path to make relative path
     if textured:
         related_path = f"./textured_mesh.glb"
         template_name = './assets/modelviewer-textured-template.html'
@@ -55,25 +121,20 @@ def build_model_viewer_html(save_folder, height=660, width=790, textured=False):
         related_path = f"./white_mesh.glb"
         template_name = './assets/modelviewer-template.html'
         output_html_path = os.path.join(save_folder, f'white_mesh.html')
-
-    with open(os.path.join(CURRENT_DIR, template_name), 'r') as f:
+    offset = 50 if textured else 10
+    with open(os.path.join(CURRENT_DIR, template_name), 'r', encoding='utf-8') as f:
         template_html = f.read()
-        obj_html = f"""
-            <div class="column is-mobile is-centered">
-                <model-viewer style="height: {height - 10}px; width: {width}px;" rotation-per-second="10deg" id="modelViewer"
-                    src="{related_path}/" disable-tap 
-                    environment-image="neutral" auto-rotate camera-target="0m 0m 0m" orientation="0deg 0deg 170deg" shadow-intensity=".9"
-                    ar auto-rotate camera-controls>
-                </model-viewer>
-            </div>
-            """
 
-    with open(output_html_path, 'w') as f:
-        f.write(template_html.replace('<model-viewer>', obj_html))
+    with open(output_html_path, 'w', encoding='utf-8') as f:
+        template_html = template_html.replace('#height#', f'{height - offset}')
+        template_html = template_html.replace('#width#', f'{width}')
+        template_html = template_html.replace('#src#', f'{related_path}/')
+        f.write(template_html)
 
-    output_html_path = output_html_path.replace(SAVE_DIR + '/', '')
-    iframe_tag = f'<iframe src="/static/{output_html_path}" height="{height}" width="100%" frameborder="0"></iframe>'
-    print(f'Find html {output_html_path}, {os.path.exists(output_html_path)}')
+    rel_path = os.path.relpath(output_html_path, SAVE_DIR)
+    iframe_tag = f'<iframe src="/static/{rel_path}" height="{height}" width="100%" frameborder="0"></iframe>'
+    print(
+        f'Find html file {output_html_path}, {os.path.exists(output_html_path)}, relative HTML path is /static/{rel_path}')
 
     return f"""
         <div style='height: {height}; width: 100%;'>
@@ -81,21 +142,58 @@ def build_model_viewer_html(save_folder, height=660, width=790, textured=False):
         </div>
     """
 
-
+@spaces.GPU(duration=40)
 def _gen_shape(
-    caption,
-    image,
+    caption=None,
+    image=None,
+    mv_image_front=None,
+    mv_image_back=None,
+    mv_image_left=None,
+    mv_image_right=None,
     steps=50,
     guidance_scale=7.5,
     seed=1234,
     octree_resolution=256,
     check_box_rembg=False,
+    num_chunks=200000,
+    randomize_seed: bool = False,
 ):
+    if not MV_MODE and image is None and caption is None:
+        raise gr.Error("Please provide either a caption or an image.")
+    if MV_MODE:
+        if mv_image_front is None and mv_image_back is None and mv_image_left is None and mv_image_right is None:
+            raise gr.Error("Please provide at least one view image.")
+        image = {}
+        if mv_image_front:
+            image['front'] = mv_image_front
+        if mv_image_back:
+            image['back'] = mv_image_back
+        if mv_image_left:
+            image['left'] = mv_image_left
+        if mv_image_right:
+            image['right'] = mv_image_right
+
+    seed = int(randomize_seed_fn(seed, randomize_seed))
+
+    octree_resolution = int(octree_resolution)
     if caption: print('prompt is', caption)
     save_folder = gen_save_folder()
-    stats = {}
+    stats = {
+        'model': {
+            'shapegen': f'{args.model_path}/{args.subfolder}',
+            'texgen': f'{args.texgen_model_path}',
+        },
+        'params': {
+            'caption': caption,
+            'steps': steps,
+            'guidance_scale': guidance_scale,
+            'seed': seed,
+            'octree_resolution': octree_resolution,
+            'check_box_rembg': check_box_rembg,
+            'num_chunks': num_chunks,
+        }
+    }
     time_meta = {}
-    start_time_0 = time.time()
 
     if image is None:
         start_time = time.time()
@@ -105,204 +203,361 @@ def _gen_shape(
             raise gr.Error(f"Text to 3D is disable. Please enable it by `python gradio_app.py --enable_t23d`.")
         time_meta['text2image'] = time.time() - start_time
 
-    image.save(os.path.join(save_folder, 'input.png'))
-
-    print(image.mode)
-    if check_box_rembg or image.mode == "RGB":
+    # remove disk io to make responding faster, uncomment at your will.
+    # image.save(os.path.join(save_folder, 'input.png'))
+    if MV_MODE:
         start_time = time.time()
-        image = rmbg_worker(image.convert('RGB'))
-        time_meta['rembg'] = time.time() - start_time
+        for k, v in image.items():
+            if check_box_rembg or v.mode == "RGB":
+                img = rmbg_worker(v.convert('RGB'))
+                image[k] = img
+        time_meta['remove background'] = time.time() - start_time
+    else:
+        if check_box_rembg or image.mode == "RGB":
+            start_time = time.time()
+            image = rmbg_worker(image.convert('RGB'))
+            time_meta['remove background'] = time.time() - start_time
 
-    image.save(os.path.join(save_folder, 'rembg.png'))
+    # remove disk io to make responding faster, uncomment at your will.
+    # image.save(os.path.join(save_folder, 'rembg.png'))
 
     # image to white model
     start_time = time.time()
 
     generator = torch.Generator()
     generator = generator.manual_seed(int(seed))
-    mesh = i23d_worker(
+    outputs = i23d_worker(
         image=image,
         num_inference_steps=steps,
         guidance_scale=guidance_scale,
         generator=generator,
-        octree_resolution=octree_resolution
-    )[0]
+        octree_resolution=octree_resolution,
+        num_chunks=num_chunks,
+        output_type='mesh'
+    )
+    time_meta['shape generation'] = time.time() - start_time
+    logger.info("---Shape generation takes %s seconds ---" % (time.time() - start_time))
 
-    mesh = FloaterRemover()(mesh)
-    mesh = DegenerateFaceRemover()(mesh)
-    mesh = FaceReducer()(mesh)
+    tmp_start = time.time()
+    mesh = export_to_trimesh(outputs)[0]
+    time_meta['export to trimesh'] = time.time() - tmp_start
 
     stats['number_of_faces'] = mesh.faces.shape[0]
     stats['number_of_vertices'] = mesh.vertices.shape[0]
 
-    time_meta['image_to_textured_3d'] = {'total': time.time() - start_time}
-    time_meta['total'] = time.time() - start_time_0
     stats['time'] = time_meta
-    return mesh, image, save_folder
-
+    main_image = image if not MV_MODE else image['front']
+    return mesh, main_image, save_folder, stats, seed
 
+@spaces.GPU(duration=90)
 def generation_all(
-    caption,
-    image,
+    caption=None,
+    image=None,
+    mv_image_front=None,
+    mv_image_back=None,
+    mv_image_left=None,
+    mv_image_right=None,
     steps=50,
     guidance_scale=7.5,
     seed=1234,
     octree_resolution=256,
-    check_box_rembg=False
+    check_box_rembg=False,
+    num_chunks=200000,
+    randomize_seed: bool = False,
 ):
-    mesh, image, save_folder = _gen_shape(
+    start_time_0 = time.time()
+    mesh, image, save_folder, stats, seed = _gen_shape(
         caption,
         image,
+        mv_image_front=mv_image_front,
+        mv_image_back=mv_image_back,
+        mv_image_left=mv_image_left,
+        mv_image_right=mv_image_right,
         steps=steps,
         guidance_scale=guidance_scale,
         seed=seed,
         octree_resolution=octree_resolution,
-        check_box_rembg=check_box_rembg
+        check_box_rembg=check_box_rembg,
+        num_chunks=num_chunks,
+        randomize_seed=randomize_seed,
     )
     path = export_mesh(mesh, save_folder, textured=False)
-    model_viewer_html = build_model_viewer_html(save_folder, height=596, width=700)
 
+    # tmp_time = time.time()
+    # mesh = floater_remove_worker(mesh)
+    # mesh = degenerate_face_remove_worker(mesh)
+    # logger.info("---Postprocessing takes %s seconds ---" % (time.time() - tmp_time))
+    # stats['time']['postprocessing'] = time.time() - tmp_time
+
+    tmp_time = time.time()
+    mesh = face_reduce_worker(mesh)
+    logger.info("---Face Reduction takes %s seconds ---" % (time.time() - tmp_time))
+    stats['time']['face reduction'] = time.time() - tmp_time
+
+    tmp_time = time.time()
     textured_mesh = texgen_worker(mesh, image)
-    path_textured = export_mesh(textured_mesh, save_folder, textured=True)
-    model_viewer_html_textured = build_model_viewer_html(save_folder, height=596, width=700, textured=True)
+    logger.info("---Texture Generation takes %s seconds ---" % (time.time() - tmp_time))
+    stats['time']['texture generation'] = time.time() - tmp_time
+    stats['time']['total'] = time.time() - start_time_0
 
+    textured_mesh.metadata['extras'] = stats
+    path_textured = export_mesh(textured_mesh, save_folder, textured=True)
+    model_viewer_html_textured = build_model_viewer_html(save_folder, height=HTML_HEIGHT, width=HTML_WIDTH,
+                                                         textured=True)
+    if args.low_vram_mode:
+        torch.cuda.empty_cache()
     return (
-        gr.update(value=path, visible=True),
-        gr.update(value=path_textured, visible=True),
-        model_viewer_html,
+        gr.update(value=path),
+        gr.update(value=path_textured),
         model_viewer_html_textured,
+        stats,
+        seed,
     )
 
-
+@spaces.GPU(duration=40)
 def shape_generation(
-    caption,
-    image,
+    caption=None,
+    image=None,
+    mv_image_front=None,
+    mv_image_back=None,
+    mv_image_left=None,
+    mv_image_right=None,
     steps=50,
     guidance_scale=7.5,
     seed=1234,
     octree_resolution=256,
     check_box_rembg=False,
+    num_chunks=200000,
+    randomize_seed: bool = False,
 ):
-    mesh, image, save_folder = _gen_shape(
+    start_time_0 = time.time()
+    mesh, image, save_folder, stats, seed = _gen_shape(
         caption,
         image,
+        mv_image_front=mv_image_front,
+        mv_image_back=mv_image_back,
+        mv_image_left=mv_image_left,
+        mv_image_right=mv_image_right,
         steps=steps,
         guidance_scale=guidance_scale,
         seed=seed,
         octree_resolution=octree_resolution,
-        check_box_rembg=check_box_rembg
+        check_box_rembg=check_box_rembg,
+        num_chunks=num_chunks,
+        randomize_seed=randomize_seed,
     )
+    stats['time']['total'] = time.time() - start_time_0
+    mesh.metadata['extras'] = stats
 
     path = export_mesh(mesh, save_folder, textured=False)
-    model_viewer_html = build_model_viewer_html(save_folder, height=596, width=700)
-
+    model_viewer_html = build_model_viewer_html(save_folder, height=HTML_HEIGHT, width=HTML_WIDTH)
+    if args.low_vram_mode:
+        torch.cuda.empty_cache()
     return (
-        gr.update(value=path, visible=True),
+        gr.update(value=path),
         model_viewer_html,
+        stats,
+        seed,
     )
 
 
 def build_app():
-    title_html = """
+    title = 'Hunyuan3D-2: High Resolution Textured 3D Assets Generation'
+    if MV_MODE:
+        title = 'Hunyuan3D-2mv: Image to 3D Generation with 1-4 Views'
+    if 'mini' in args.subfolder:
+        title = 'Hunyuan3D-2mini: Strong 0.6B Image to Shape Generator'
+    if TURBO_MODE:
+        title = title.replace(':', '-Turbo: Fast ')
+
+    title_html = f"""
     <div style="font-size: 2em; font-weight: bold; text-align: center; margin-bottom: 5px">
 
-    Hunyuan3D-2: Scaling Diffusion Models for High Resolution Textured 3D Assets Generation
+    {title}
     </div>
     <div align="center">
     Tencent Hunyuan3D Team
     </div>
     <div align="center">
-      <a href="https://github.com/tencent/Hunyuan3D-2">Github Page</a> &ensp; 
+      <a href="https://github.com/tencent/Hunyuan3D-2">Github</a> &ensp; 
       <a href="http://3d-models.hunyuan.tencent.com">Homepage</a> &ensp;
+      <a href="https://3d.hunyuan.tencent.com">Hunyuan3D Studio</a> &ensp;
       <a href="#">Technical Report</a> &ensp;
-      <a href="https://huggingface.co/Tencent/Hunyuan3D-2"> Models</a> &ensp;
+      <a href="https://huggingface.co/Tencent/Hunyuan3D-2"> Pretrained Models</a> &ensp;
     </div>
+    """
+    custom_css = """
+    .app.svelte-wpkpf6.svelte-wpkpf6:not(.fill_width) {
+        max-width: 1480px;
+    }
+    .mv-image button .wrap {
+        font-size: 10px;
+    }
+
+    .mv-image .icon-wrap {
+        width: 20px;
+    }
+
     """
 
-    with gr.Blocks(theme=gr.themes.Base(), title='Hunyuan-3D-2.0') as demo:
+    with gr.Blocks(theme=gr.themes.Base(), title='Hunyuan-3D-2.0', analytics_enabled=False, css=custom_css) as demo:
         gr.HTML(title_html)
 
         with gr.Row():
-            with gr.Column(scale=2):
-                with gr.Tabs() as tabs_prompt:
-                    with gr.Tab('Image Prompt', id='tab_img_prompt') as tab_ip:
+            with gr.Column(scale=3):
+                with gr.Tabs(selected='tab_img_prompt') as tabs_prompt:
+                    with gr.Tab('Image Prompt', id='tab_img_prompt', visible=not MV_MODE) as tab_ip:
                         image = gr.Image(label='Image', type='pil', image_mode='RGBA', height=290)
-                        with gr.Row():
-                            check_box_rembg = gr.Checkbox(value=True, label='Remove Background')
 
-                    with gr.Tab('Text Prompt', id='tab_txt_prompt', visible=HAS_T2I) as tab_tp:
+                    with gr.Tab('Text Prompt', id='tab_txt_prompt', visible=HAS_T2I and not MV_MODE) as tab_tp:
                         caption = gr.Textbox(label='Text Prompt',
                                              placeholder='HunyuanDiT will be used to generate image.',
                                              info='Example: A 3D model of a cute cat, white background')
-
-                with gr.Accordion('Advanced Options', open=False):
-                    num_steps = gr.Slider(maximum=50, minimum=20, value=30, step=1, label='Inference Steps')
-                    octree_resolution = gr.Dropdown([256, 384, 512], value=256, label='Octree Resolution')
-                    cfg_scale = gr.Number(value=5.5, label='Guidance Scale')
-                    seed = gr.Slider(maximum=1e7, minimum=0, value=1234, label='Seed')
-
-                with gr.Group():
-                    btn = gr.Button(value='Generate Shape Only', variant='primary')
-                    btn_all = gr.Button(value='Generate Shape and Texture', variant='primary', visible=HAS_TEXTUREGEN)
+                    with gr.Tab('MultiView Prompt', visible=MV_MODE) as tab_mv:
+                        # gr.Label('Please upload at least one front image.')
+                        with gr.Row():
+                            mv_image_front = gr.Image(label='Front', type='pil', image_mode='RGBA', height=140,
+                                                      min_width=100, elem_classes='mv-image')
+                            mv_image_back = gr.Image(label='Back', type='pil', image_mode='RGBA', height=140,
+                                                     min_width=100, elem_classes='mv-image')
+                        with gr.Row():
+                            mv_image_left = gr.Image(label='Left', type='pil', image_mode='RGBA', height=140,
+                                                     min_width=100, elem_classes='mv-image')
+                            mv_image_right = gr.Image(label='Right', type='pil', image_mode='RGBA', height=140,
+                                                      min_width=100, elem_classes='mv-image')
+
+                with gr.Row():
+                    btn = gr.Button(value='Gen Shape', variant='primary', min_width=100)
+                    btn_all = gr.Button(value='Gen Textured Shape',
+                                        variant='primary',
+                                        visible=HAS_TEXTUREGEN,
+                                        min_width=100)
 
                 with gr.Group():
                     file_out = gr.File(label="File", visible=False)
                     file_out2 = gr.File(label="File", visible=False)
 
-            with gr.Column(scale=5):
-                with gr.Tabs():
-                    with gr.Tab('Generated Mesh') as mesh1:
-                        html_output1 = gr.HTML(HTML_OUTPUT_PLACEHOLDER, label='Output')
-                    with gr.Tab('Generated Textured Mesh') as mesh2:
-                        html_output2 = gr.HTML(HTML_OUTPUT_PLACEHOLDER, label='Output')
-
-            with gr.Column(scale=2):
-                with gr.Tabs() as gallery:
-                    with gr.Tab('Image to 3D Gallery', id='tab_img_gallery') as tab_gi:
+                with gr.Tabs(selected='tab_options' if TURBO_MODE else 'tab_export'):
+                    with gr.Tab("Options", id='tab_options', visible=TURBO_MODE):
+                        gen_mode = gr.Radio(label='Generation Mode',
+                                            info='Recommendation: Turbo for most cases, Fast for very complex cases, Standard seldom use.',
+                                            choices=['Turbo', 'Fast', 'Standard'], value='Turbo')
+                        decode_mode = gr.Radio(label='Decoding Mode',
+                                               info='The resolution for exporting mesh from generated vectset',
+                                               choices=['Low', 'Standard', 'High'],
+                                               value='Standard')
+                    with gr.Tab('Advanced Options', id='tab_advanced_options'):
+                        with gr.Row():
+                            check_box_rembg = gr.Checkbox(value=True, label='Remove Background', min_width=100)
+                            randomize_seed = gr.Checkbox(label="Randomize seed", value=True, min_width=100)
+                        seed = gr.Slider(
+                            label="Seed",
+                            minimum=0,
+                            maximum=MAX_SEED,
+                            step=1,
+                            value=1234,
+                            min_width=100,
+                        )
+                        with gr.Row():
+                            num_steps = gr.Slider(maximum=100,
+                                                  minimum=1,
+                                                  value=5 if 'turbo' in args.subfolder else 30,
+                                                  step=1, label='Inference Steps')
+                            octree_resolution = gr.Slider(maximum=512, minimum=16, value=256, label='Octree Resolution')
+                        with gr.Row():
+                            cfg_scale = gr.Number(value=5.0, label='Guidance Scale', min_width=100)
+                            num_chunks = gr.Slider(maximum=5000000, minimum=1000, value=8000,
+                                                   label='Number of Chunks', min_width=100)
+                    with gr.Tab("Export", id='tab_export'):
+                        with gr.Row():
+                            file_type = gr.Dropdown(label='File Type', choices=SUPPORTED_FORMATS,
+                                                    value='glb', min_width=100)
+                            reduce_face = gr.Checkbox(label='Simplify Mesh', value=False, min_width=100)
+                            export_texture = gr.Checkbox(label='Include Texture', value=False,
+                                                         visible=False, min_width=100)
+                        target_face_num = gr.Slider(maximum=1000000, minimum=100, value=10000,
+                                                    label='Target Face Number')
+                        with gr.Row():
+                            confirm_export = gr.Button(value="Transform", min_width=100)
+                            file_export = gr.DownloadButton(label="Download", variant='primary',
+                                                            interactive=False, min_width=100)
+
+            with gr.Column(scale=6):
+                with gr.Tabs(selected='gen_mesh_panel') as tabs_output:
+                    with gr.Tab('Generated Mesh', id='gen_mesh_panel'):
+                        html_gen_mesh = gr.HTML(HTML_OUTPUT_PLACEHOLDER, label='Output')
+                    with gr.Tab('Exporting Mesh', id='export_mesh_panel'):
+                        html_export_mesh = gr.HTML(HTML_OUTPUT_PLACEHOLDER, label='Output')
+                    with gr.Tab('Mesh Statistic', id='stats_panel'):
+                        stats = gr.Json({}, label='Mesh Stats')
+
+            with gr.Column(scale=3 if MV_MODE else 2):
+                with gr.Tabs(selected='tab_img_gallery') as gallery:
+                    with gr.Tab('Image to 3D Gallery', id='tab_img_gallery', visible=not MV_MODE) as tab_gi:
                         with gr.Row():
                             gr.Examples(examples=example_is, inputs=[image],
-                                        label="Image Prompts", examples_per_page=18)
+                                        label=None, examples_per_page=18)
 
-                    with gr.Tab('Text to 3D Gallery', id='tab_txt_gallery', visible=HAS_T2I) as tab_gt:
+                    with gr.Tab('Text to 3D Gallery', id='tab_txt_gallery', visible=HAS_T2I and not MV_MODE) as tab_gt:
                         with gr.Row():
                             gr.Examples(examples=example_ts, inputs=[caption],
-                                        label="Text Prompts", examples_per_page=18)
+                                        label=None, examples_per_page=18)
+                    with gr.Tab('MultiView to 3D Gallery', id='tab_mv_gallery', visible=MV_MODE) as tab_mv:
+                        with gr.Row():
+                            gr.Examples(examples=example_mvs,
+                                        inputs=[mv_image_front, mv_image_back, mv_image_left, mv_image_right],
+                                        label=None, examples_per_page=6)
 
+        gr.HTML(f"""
+        <div align="center">
+        Activated Model - Shape Generation ({args.model_path}/{args.subfolder}) ; Texture Generation ({'Hunyuan3D-2' if HAS_TEXTUREGEN else 'Unavailable'})
+        </div>
+        """)
         if not HAS_TEXTUREGEN:
-            gr.HTML(""")
-            <div style="margin-top: 20px;">
+            gr.HTML("""
+            <div style="margin-top: 5px;"  align="center">
                 <b>Warning: </b>
                 Texture synthesis is disable due to missing requirements,
-                 please install requirements following README.md to activate it.
+                 please install requirements following <a href="https://github.com/Tencent/Hunyuan3D-2?tab=readme-ov-file#install-requirements">README.md</a>to activate it.
             </div>
             """)
         if not args.enable_t23d:
             gr.HTML("""
-            <div style="margin-top: 20px;">
+            <div style="margin-top: 5px;"  align="center">
                 <b>Warning: </b>
                 Text to 3D is disable. To activate it, please run `python gradio_app.py --enable_t23d`.
             </div>
             """)
 
-        tab_gi.select(fn=lambda: gr.update(selected='tab_img_prompt'), outputs=tabs_prompt)
+        tab_ip.select(fn=lambda: gr.update(selected='tab_img_gallery'), outputs=gallery)
         if HAS_T2I:
-            tab_gt.select(fn=lambda: gr.update(selected='tab_txt_prompt'), outputs=tabs_prompt)
+            tab_tp.select(fn=lambda: gr.update(selected='tab_txt_gallery'), outputs=gallery)
 
         btn.click(
             shape_generation,
             inputs=[
                 caption,
                 image,
+                mv_image_front,
+                mv_image_back,
+                mv_image_left,
+                mv_image_right,
                 num_steps,
                 cfg_scale,
                 seed,
                 octree_resolution,
                 check_box_rembg,
+                num_chunks,
+                randomize_seed,
             ],
-            outputs=[file_out, html_output1]
+            outputs=[file_out, html_gen_mesh, stats, seed]
         ).then(
-            lambda: gr.update(visible=True),
-            outputs=[file_out],
+            lambda: (gr.update(visible=False, value=False), gr.update(interactive=True), gr.update(interactive=True),
+                     gr.update(interactive=False)),
+            outputs=[export_texture, reduce_face, confirm_export, file_export],
+        ).then(
+            lambda: gr.update(selected='gen_mesh_panel'),
+            outputs=[tabs_output],
         )
 
         btn_all.click(
@@ -310,16 +565,88 @@ def build_app():
             inputs=[
                 caption,
                 image,
+                mv_image_front,
+                mv_image_back,
+                mv_image_left,
+                mv_image_right,
                 num_steps,
                 cfg_scale,
                 seed,
                 octree_resolution,
                 check_box_rembg,
+                num_chunks,
+                randomize_seed,
             ],
-            outputs=[file_out, file_out2, html_output1, html_output2]
+            outputs=[file_out, file_out2, html_gen_mesh, stats, seed]
+        ).then(
+            lambda: (gr.update(visible=True, value=True), gr.update(interactive=False), gr.update(interactive=True),
+                     gr.update(interactive=False)),
+            outputs=[export_texture, reduce_face, confirm_export, file_export],
         ).then(
-            lambda: (gr.update(visible=True), gr.update(visible=True)),
-            outputs=[file_out, file_out2],
+            lambda: gr.update(selected='gen_mesh_panel'),
+            outputs=[tabs_output],
+        )
+
+        def on_gen_mode_change(value):
+            if value == 'Turbo':
+                return gr.update(value=5)
+            elif value == 'Fast':
+                return gr.update(value=10)
+            else:
+                return gr.update(value=30)
+
+        gen_mode.change(on_gen_mode_change, inputs=[gen_mode], outputs=[num_steps])
+
+        def on_decode_mode_change(value):
+            if value == 'Low':
+                return gr.update(value=196)
+            elif value == 'Standard':
+                return gr.update(value=256)
+            else:
+                return gr.update(value=384)
+
+        decode_mode.change(on_decode_mode_change, inputs=[decode_mode], outputs=[octree_resolution])
+
+        def on_export_click(file_out, file_out2, file_type, reduce_face, export_texture, target_face_num):
+            if file_out is None:
+                raise gr.Error('Please generate a mesh first.')
+
+            print(f'exporting {file_out}')
+            print(f'reduce face to {target_face_num}')
+            if export_texture:
+                mesh = trimesh.load(file_out2)
+                save_folder = gen_save_folder()
+                path = export_mesh(mesh, save_folder, textured=True, type=file_type)
+
+                # for preview
+                save_folder = gen_save_folder()
+                _ = export_mesh(mesh, save_folder, textured=True)
+                model_viewer_html = build_model_viewer_html(save_folder, height=HTML_HEIGHT, width=HTML_WIDTH,
+                                                            textured=True)
+            else:
+                mesh = trimesh.load(file_out)
+                mesh = floater_remove_worker(mesh)
+                mesh = degenerate_face_remove_worker(mesh)
+                if reduce_face:
+                    mesh = face_reduce_worker(mesh, target_face_num)
+                save_folder = gen_save_folder()
+                path = export_mesh(mesh, save_folder, textured=False, type=file_type)
+
+                # for preview
+                save_folder = gen_save_folder()
+                _ = export_mesh(mesh, save_folder, textured=False)
+                model_viewer_html = build_model_viewer_html(save_folder, height=HTML_HEIGHT, width=HTML_WIDTH,
+                                                            textured=False)
+            print(f'export to {path}')
+            return model_viewer_html, gr.update(value=path, interactive=True)
+
+        confirm_export.click(
+            lambda: gr.update(selected='export_mesh_panel'),
+            outputs=[tabs_output],
+        ).then(
+            on_export_click,
+            inputs=[file_out, file_out2, file_type, reduce_face, export_texture, target_face_num],
+            outputs=[html_export_mesh, file_export]
         )
 
     return demo
@@ -329,18 +656,40 @@ if __name__ == '__main__':
     import argparse
 
     parser = argparse.ArgumentParser()
-    parser.add_argument('--port', type=int, default=8080)
+    parser.add_argument("--model_path", type=str, default='tencent/Hunyuan3D-2')
+    parser.add_argument("--subfolder", type=str, default='hunyuan3d-dit-v2-0')
+    parser.add_argument("--texgen_model_path", type=str, default='tencent/Hunyuan3D-2')
+    parser.add_argument('--port', type=int, default=7860)
+    parser.add_argument('--host', type=str, default='0.0.0.0')
+    parser.add_argument('--device', type=str, default='cuda')
+    parser.add_argument('--mc_algo', type=str, default='mc')
     parser.add_argument('--cache-path', type=str, default='gradio_cache')
     parser.add_argument('--enable_t23d', action='store_true')
+    parser.add_argument('--disable_tex', action='store_true')
+    parser.add_argument('--enable_flashvdm', action='store_true')
+    parser.add_argument('--compile', action='store_true')
+    parser.add_argument('--low_vram_mode', action='store_true')
     args = parser.parse_args()
 
+    args.enable_flashvdm = True
+    args.enable_t23d = False
+    
     SAVE_DIR = args.cache_path
     os.makedirs(SAVE_DIR, exist_ok=True)
 
     CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
-
-    HTML_OUTPUT_PLACEHOLDER = """
-    <div style='height: 596px; width: 100%; border-radius: 8px; border-color: #e5e7eb; order-style: solid; border-width: 1px;'></div>
+    MV_MODE = 'mv' in args.model_path
+    TURBO_MODE = 'turbo' in args.subfolder
+
+    HTML_HEIGHT = 690 if MV_MODE else 650
+    HTML_WIDTH = 500
+    HTML_OUTPUT_PLACEHOLDER = f"""
+    <div style='height: {650}px; width: 100%; border-radius: 8px; border-color: #e5e7eb; border-style: solid; border-width: 1px; display: flex; justify-content: center; align-items: center;'>
+      <div style='text-align: center; font-size: 16px; color: #6b7280;'>
+        <p style="color: #8d8d8d;">Welcome to Hunyuan3D!</p>
+        <p style="color: #8d8d8d;">No mesh here.</p>
+      </div>
+    </div>
     """
 
     INPUT_MESH_HTML = """
@@ -350,31 +699,56 @@ if __name__ == '__main__':
     """
     example_is = get_example_img_list()
     example_ts = get_example_txt_list()
+    example_mvs = get_example_mv_list()
 
-    try:
-        from hy3dgen.texgen import Hunyuan3DPaintPipeline
+    SUPPORTED_FORMATS = ['glb', 'obj', 'ply', 'stl']
 
-        texgen_worker = Hunyuan3DPaintPipeline.from_pretrained('tencent/Hunyuan3D-2')
-        HAS_TEXTUREGEN = True
-    except Exception as e:
-        print(e)
-        print("Failed to load texture generator.")
-        print('Please try to install requirements by following README.md')
-        HAS_TEXTUREGEN = False
+    HAS_TEXTUREGEN = False
+    if not args.disable_tex:
+        try:
+            from hy3dgen.texgen import Hunyuan3DPaintPipeline
+
+            texgen_worker = Hunyuan3DPaintPipeline.from_pretrained(args.texgen_model_path)
+            if args.low_vram_mode:
+                texgen_worker.enable_model_cpu_offload()
+            # Not help much, ignore for now.
+            # if args.compile:
+            #     texgen_worker.models['delight_model'].pipeline.unet.compile()
+            #     texgen_worker.models['delight_model'].pipeline.vae.compile()
+            #     texgen_worker.models['multiview_model'].pipeline.unet.compile()
+            #     texgen_worker.models['multiview_model'].pipeline.vae.compile()
+            HAS_TEXTUREGEN = True
+        except Exception as e:
+            print(e)
+            print("Failed to load texture generator.")
+            print('Please try to install requirements by following README.md')
+            HAS_TEXTUREGEN = False
 
-    HAS_T2I = False
+    HAS_T2I = True
     if args.enable_t23d:
         from hy3dgen.text2image import HunyuanDiTPipeline
 
-        t2i_worker = HunyuanDiTPipeline('Tencent-Hunyuan--HunyuanDiT-v1.1-Diffusers-Distilled')
+        t2i_worker = HunyuanDiTPipeline('Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers-Distilled')
         HAS_T2I = True
 
-    from hy3dgen.shapegen import FaceReducer, FloaterRemover, DegenerateFaceRemover, \
+    from hy3dgen.shapegen import FaceReducer, FloaterRemover, DegenerateFaceRemover, MeshSimplifier, \
         Hunyuan3DDiTFlowMatchingPipeline
+    from hy3dgen.shapegen.pipelines import export_to_trimesh
     from hy3dgen.rembg import BackgroundRemover
 
     rmbg_worker = BackgroundRemover()
-    i23d_worker = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained('tencent/Hunyuan3D-2')
+    i23d_worker = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(
+        args.model_path,
+        subfolder=args.subfolder,
+        use_safetensors=True,
+        device=args.device,
+    )
+    if args.enable_flashvdm:
+        mc_algo = 'mc' if args.device in ['cpu', 'mps'] else args.mc_algo
+        i23d_worker.enable_flashvdm(mc_algo=mc_algo)
+    if args.compile:
+        i23d_worker.compile()
+
     floater_remove_worker = FloaterRemover()
     degenerate_face_remove_worker = DegenerateFaceRemover()
     face_reduce_worker = FaceReducer()
@@ -383,10 +757,13 @@ if __name__ == '__main__':
     # create a FastAPI app
     app = FastAPI()
     # create a static directory to store the static files
-    static_dir = Path('./gradio_cache')
+    static_dir = Path(SAVE_DIR).absolute()
     static_dir.mkdir(parents=True, exist_ok=True)
-    app.mount("/static", StaticFiles(directory=static_dir), name="static")
+    app.mount("/static", StaticFiles(directory=static_dir, html=True), name="static")
+    shutil.copytree('./assets/env_maps', os.path.join(static_dir, 'env_maps'), dirs_exist_ok=True)
 
+    if args.low_vram_mode:
+        torch.cuda.empty_cache()
     demo = build_app()
     app = gr.mount_gradio_app(app, demo, path="/")
-    uvicorn.run(app, host="0.0.0.0", port=args.port)
+    uvicorn.run(app, host=args.host, port=args.port)
diff --git a/hy3dgen/__init__.py b/hy3dgen/__init__.py
index e307c3f8c1292da02f308e4b59ef0bcd6fe7305e..8bb2bf86e283e50f0df2ecfba8fc66289df09901 100644
--- a/hy3dgen/__init__.py
+++ b/hy3dgen/__init__.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -20,4 +10,4 @@
 # their software and algorithms, including trained model weights, parameters (including
 # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
 # fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
\ No newline at end of file
diff --git a/hy3dgen/rembg.py b/hy3dgen/rembg.py
index c0d99483c8354fc10c6689b5cf12ebcd44368d92..6247f060c9f325b1e267668baf236ec8e4c2dae9 100644
--- a/hy3dgen/rembg.py
+++ b/hy3dgen/rembg.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -22,7 +12,6 @@
 # fine-tuning enabling code and other elements of the foregoing made publicly available
 # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
-
 from PIL import Image
 from rembg import remove, new_session
 
diff --git a/hy3dgen/shapegen/__init__.py b/hy3dgen/shapegen/__init__.py
old mode 100644
new mode 100755
index d1f9534c15d029511d910d29e45da5ba7b8c8714..1b1f9cce42c1d2bef94bbb2d7b088d24e76fe01e
--- a/hy3dgen/shapegen/__init__.py
+++ b/hy3dgen/shapegen/__init__.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -23,5 +13,5 @@
 # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
 from .pipelines import Hunyuan3DDiTPipeline, Hunyuan3DDiTFlowMatchingPipeline
-from .postprocessors import FaceReducer, FloaterRemover, DegenerateFaceRemover
+from .postprocessors import FaceReducer, FloaterRemover, DegenerateFaceRemover, MeshSimplifier
 from .preprocessors import ImageProcessorV2, IMAGE_PROCESSORS, DEFAULT_IMAGEPROCESSOR
diff --git a/hy3dgen/shapegen/models/__init__.py b/hy3dgen/shapegen/models/__init__.py
old mode 100644
new mode 100755
index 684b3e389737fb988f5e363e777c34f6cd1fe4ea..8179353ba7a5bdb8bcc30baa64e319fb8f884d57
--- a/hy3dgen/shapegen/models/__init__.py
+++ b/hy3dgen/shapegen/models/__init__.py
@@ -23,6 +23,6 @@
 # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
 
+from .autoencoders import ShapeVAE
 from .conditioner import DualImageEncoder, SingleImageEncoder, DinoImageEncoder, CLIPImageEncoder
-from .hunyuan3ddit import Hunyuan3DDiT
-from .vae import ShapeVAE
+from .denoisers import Hunyuan3DDiT
diff --git a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/__init__.py b/hy3dgen/shapegen/models/autoencoders/__init__.py
similarity index 61%
rename from hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/__init__.py
rename to hy3dgen/shapegen/models/autoencoders/__init__.py
index e307c3f8c1292da02f308e4b59ef0bcd6fe7305e..20bbf8d9559f8d5de7d7ae2d88bbb0348a197dc4 100644
--- a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/__init__.py
+++ b/hy3dgen/shapegen/models/autoencoders/__init__.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -21,3 +11,10 @@
 # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
 # fine-tuning enabling code and other elements of the foregoing made publicly available
 # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+
+from .attention_blocks import CrossAttentionDecoder
+from .attention_processors import FlashVDMCrossAttentionProcessor, CrossAttentionProcessor, \
+    FlashVDMTopMCrossAttentionProcessor
+from .model import ShapeVAE, VectsetVAE
+from .surface_extractors import SurfaceExtractors, MCSurfaceExtractor, DMCSurfaceExtractor, Latent2MeshOutput
+from .volume_decoders import HierarchicalVolumeDecoding, FlashVDMVolumeDecoding, VanillaVolumeDecoder
diff --git a/hy3dgen/shapegen/models/vae.py b/hy3dgen/shapegen/models/autoencoders/attention_blocks.py
similarity index 63%
rename from hy3dgen/shapegen/models/vae.py
rename to hy3dgen/shapegen/models/autoencoders/attention_blocks.py
index aef2784ac0db653714e711d12697eafc962c2aa3..ab34eeb4eb8b19eb52e1ff188aaf785ad3fa96cb 100644
--- a/hy3dgen/shapegen/models/vae.py
+++ b/hy3dgen/shapegen/models/autoencoders/attention_blocks.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -22,15 +12,25 @@
 # fine-tuning enabling code and other elements of the foregoing made publicly available
 # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
-from typing import Tuple, List, Union, Optional
 
-import numpy as np
+import os
+from typing import Optional
+
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
-from einops import rearrange, repeat
-from skimage import measure
-from tqdm import tqdm
+from einops import rearrange
+
+from .attention_processors import CrossAttentionProcessor
+from ...utils import logger
+
+scaled_dot_product_attention = nn.functional.scaled_dot_product_attention
+
+if os.environ.get('USE_SAGEATTN', '0') == '1':
+    try:
+        from sageattention import sageattn
+    except ImportError:
+        raise ImportError('Please install the package "sageattention" to use this USE_SAGEATTN.')
+    scaled_dot_product_attention = sageattn
 
 
 class FourierEmbedder(nn.Module):
@@ -166,13 +166,14 @@ class MLP(nn.Module):
     def __init__(
         self, *,
         width: int,
+        expand_ratio: int = 4,
         output_width: int = None,
         drop_path_rate: float = 0.0
     ):
         super().__init__()
         self.width = width
-        self.c_fc = nn.Linear(width, width * 4)
-        self.c_proj = nn.Linear(width * 4, output_width if output_width is not None else width)
+        self.c_fc = nn.Linear(width, width * expand_ratio)
+        self.c_proj = nn.Linear(width * expand_ratio, output_width if output_width is not None else width)
         self.gelu = nn.GELU()
         self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
 
@@ -196,6 +197,8 @@ class QKVMultiheadCrossAttention(nn.Module):
         self.q_norm = norm_layer(width // heads, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity()
         self.k_norm = norm_layer(width // heads, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity()
 
+        self.attn_processor = CrossAttentionProcessor()
+
     def forward(self, q, kv):
         _, n_ctx, _ = q.shape
         bs, n_data, width = kv.shape
@@ -206,10 +209,9 @@ class QKVMultiheadCrossAttention(nn.Module):
 
         q = self.q_norm(q)
         k = self.k_norm(k)
-
         q, k, v = map(lambda t: rearrange(t, 'b n h d -> b h n d', h=self.heads), (q, k, v))
-        out = F.scaled_dot_product_attention(q, k, v).transpose(1, 2).reshape(bs, n_ctx, -1)
-
+        out = self.attn_processor(self, q, k, v)
+        out = out.transpose(1, 2).reshape(bs, n_ctx, -1)
         return out
 
 
@@ -223,7 +225,8 @@ class MultiheadCrossAttention(nn.Module):
         n_data: Optional[int] = None,
         data_width: Optional[int] = None,
         norm_layer=nn.LayerNorm,
-        qk_norm: bool = False
+        qk_norm: bool = False,
+        kv_cache: bool = False,
     ):
         super().__init__()
         self.n_data = n_data
@@ -240,10 +243,18 @@ class MultiheadCrossAttention(nn.Module):
             norm_layer=norm_layer,
             qk_norm=qk_norm
         )
+        self.kv_cache = kv_cache
+        self.data = None
 
     def forward(self, x, data):
         x = self.c_q(x)
-        data = self.c_kv(data)
+        if self.kv_cache:
+            if self.data is None:
+                self.data = self.c_kv(data)
+                logger.info('Save kv cache,this should be called only once for one mesh')
+            data = self.data
+        else:
+            data = self.c_kv(data)
         x = self.attention(x, data)
         x = self.c_proj(x)
         return x
@@ -256,6 +267,7 @@ class ResidualCrossAttentionBlock(nn.Module):
         n_data: Optional[int] = None,
         width: int,
         heads: int,
+        mlp_expand_ratio: int = 4,
         data_width: Optional[int] = None,
         qkv_bias: bool = True,
         norm_layer=nn.LayerNorm,
@@ -278,7 +290,7 @@ class ResidualCrossAttentionBlock(nn.Module):
         self.ln_1 = norm_layer(width, elementwise_affine=True, eps=1e-6)
         self.ln_2 = norm_layer(data_width, elementwise_affine=True, eps=1e-6)
         self.ln_3 = norm_layer(width, elementwise_affine=True, eps=1e-6)
-        self.mlp = MLP(width=width)
+        self.mlp = MLP(width=width, expand_ratio=mlp_expand_ratio)
 
     def forward(self, x: torch.Tensor, data: torch.Tensor):
         x = x + self.attn(self.ln_1(x), self.ln_2(data))
@@ -312,7 +324,7 @@ class QKVMultiheadAttention(nn.Module):
         k = self.k_norm(k)
 
         q, k, v = map(lambda t: rearrange(t, 'b n h d -> b h n d', h=self.heads), (q, k, v))
-        out = F.scaled_dot_product_attention(q, k, v).transpose(1, 2).reshape(bs, n_ctx, -1)
+        out = scaled_dot_product_attention(q, k, v).transpose(1, 2).reshape(bs, n_ctx, -1)
         return out
 
 
@@ -430,207 +442,52 @@ class CrossAttentionDecoder(nn.Module):
         fourier_embedder: FourierEmbedder,
         width: int,
         heads: int,
+        mlp_expand_ratio: int = 4,
+        downsample_ratio: int = 1,
+        enable_ln_post: bool = True,
         qkv_bias: bool = True,
         qk_norm: bool = False,
         label_type: str = "binary"
     ):
         super().__init__()
 
+        self.enable_ln_post = enable_ln_post
         self.fourier_embedder = fourier_embedder
-
+        self.downsample_ratio = downsample_ratio
         self.query_proj = nn.Linear(self.fourier_embedder.out_dim, width)
-
+        if self.downsample_ratio != 1:
+            self.latents_proj = nn.Linear(width * downsample_ratio, width)
+        if self.enable_ln_post == False:
+            qk_norm = False
         self.cross_attn_decoder = ResidualCrossAttentionBlock(
             n_data=num_latents,
             width=width,
+            mlp_expand_ratio=mlp_expand_ratio,
             heads=heads,
             qkv_bias=qkv_bias,
             qk_norm=qk_norm
         )
 
-        self.ln_post = nn.LayerNorm(width)
+        if self.enable_ln_post:
+            self.ln_post = nn.LayerNorm(width)
         self.output_proj = nn.Linear(width, out_channels)
         self.label_type = label_type
-
-    def forward(self, queries: torch.FloatTensor, latents: torch.FloatTensor):
-        queries = self.query_proj(self.fourier_embedder(queries).to(latents.dtype))
-        x = self.cross_attn_decoder(queries, latents)
-        x = self.ln_post(x)
+        self.count = 0
+
+    def set_cross_attention_processor(self, processor):
+        self.cross_attn_decoder.attn.attention.attn_processor = processor
+
+    def set_default_cross_attention_processor(self):
+        self.cross_attn_decoder.attn.attention.attn_processor = CrossAttentionProcessor
+
+    def forward(self, queries=None, query_embeddings=None, latents=None):
+        if query_embeddings is None:
+            query_embeddings = self.query_proj(self.fourier_embedder(queries).to(latents.dtype))
+        self.count += query_embeddings.shape[1]
+        if self.downsample_ratio != 1:
+            latents = self.latents_proj(latents)
+        x = self.cross_attn_decoder(query_embeddings, latents)
+        if self.enable_ln_post:
+            x = self.ln_post(x)
         occ = self.output_proj(x)
         return occ
-
-
-def generate_dense_grid_points(bbox_min: np.ndarray,
-                               bbox_max: np.ndarray,
-                               octree_depth: int,
-                               indexing: str = "ij",
-                               octree_resolution: int = None,
-                               ):
-    length = bbox_max - bbox_min
-    num_cells = np.exp2(octree_depth)
-    if octree_resolution is not None:
-        num_cells = octree_resolution
-
-    x = np.linspace(bbox_min[0], bbox_max[0], int(num_cells) + 1, dtype=np.float32)
-    y = np.linspace(bbox_min[1], bbox_max[1], int(num_cells) + 1, dtype=np.float32)
-    z = np.linspace(bbox_min[2], bbox_max[2], int(num_cells) + 1, dtype=np.float32)
-    [xs, ys, zs] = np.meshgrid(x, y, z, indexing=indexing)
-    xyz = np.stack((xs, ys, zs), axis=-1)
-    xyz = xyz.reshape(-1, 3)
-    grid_size = [int(num_cells) + 1, int(num_cells) + 1, int(num_cells) + 1]
-
-    return xyz, grid_size, length
-
-
-def center_vertices(vertices):
-    """Translate the vertices so that bounding box is centered at zero."""
-    vert_min = vertices.min(dim=0)[0]
-    vert_max = vertices.max(dim=0)[0]
-    vert_center = 0.5 * (vert_min + vert_max)
-    return vertices - vert_center
-
-
-class Latent2MeshOutput:
-
-    def __init__(self, mesh_v=None, mesh_f=None):
-        self.mesh_v = mesh_v
-        self.mesh_f = mesh_f
-
-
-class ShapeVAE(nn.Module):
-    def __init__(
-        self,
-        *,
-        num_latents: int,
-        embed_dim: int,
-        width: int,
-        heads: int,
-        num_decoder_layers: int,
-        num_freqs: int = 8,
-        include_pi: bool = True,
-        qkv_bias: bool = True,
-        qk_norm: bool = False,
-        label_type: str = "binary",
-        drop_path_rate: float = 0.0,
-        scale_factor: float = 1.0,
-    ):
-        super().__init__()
-        self.fourier_embedder = FourierEmbedder(num_freqs=num_freqs, include_pi=include_pi)
-
-        self.post_kl = nn.Linear(embed_dim, width)
-
-        self.transformer = Transformer(
-            n_ctx=num_latents,
-            width=width,
-            layers=num_decoder_layers,
-            heads=heads,
-            qkv_bias=qkv_bias,
-            qk_norm=qk_norm,
-            drop_path_rate=drop_path_rate
-        )
-
-        self.geo_decoder = CrossAttentionDecoder(
-            fourier_embedder=self.fourier_embedder,
-            out_channels=1,
-            num_latents=num_latents,
-            width=width,
-            heads=heads,
-            qkv_bias=qkv_bias,
-            qk_norm=qk_norm,
-            label_type=label_type,
-        )
-
-        self.scale_factor = scale_factor
-        self.latent_shape = (num_latents, embed_dim)
-
-    def forward(self, latents):
-        latents = self.post_kl(latents)
-        latents = self.transformer(latents)
-        return latents
-
-    @torch.no_grad()
-    def latents2mesh(
-        self,
-        latents: torch.FloatTensor,
-        bounds: Union[Tuple[float], List[float], float] = 1.1,
-        octree_depth: int = 7,
-        num_chunks: int = 10000,
-        mc_level: float = -1 / 512,
-        octree_resolution: int = None,
-        mc_algo: str = 'dmc',
-    ):
-        device = latents.device
-
-        # 1. generate query points
-        if isinstance(bounds, float):
-            bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds]
-        bbox_min = np.array(bounds[0:3])
-        bbox_max = np.array(bounds[3:6])
-        bbox_size = bbox_max - bbox_min
-        xyz_samples, grid_size, length = generate_dense_grid_points(
-            bbox_min=bbox_min,
-            bbox_max=bbox_max,
-            octree_depth=octree_depth,
-            octree_resolution=octree_resolution,
-            indexing="ij"
-        )
-        xyz_samples = torch.FloatTensor(xyz_samples)
-
-        # 2. latents to 3d volume
-        batch_logits = []
-        batch_size = latents.shape[0]
-        for start in tqdm(range(0, xyz_samples.shape[0], num_chunks),
-                          desc=f"MC Level {mc_level} Implicit Function:"):
-            queries = xyz_samples[start: start + num_chunks, :].to(device)
-            queries = queries.half()
-            batch_queries = repeat(queries, "p c -> b p c", b=batch_size)
-
-            logits = self.geo_decoder(batch_queries.to(latents.dtype), latents)
-            if mc_level == -1:
-                mc_level = 0
-                logits = torch.sigmoid(logits) * 2 - 1
-                print(f'Training with soft labels, inference with sigmoid and marching cubes level 0.')
-            batch_logits.append(logits)
-        grid_logits = torch.cat(batch_logits, dim=1)
-        grid_logits = grid_logits.view((batch_size, grid_size[0], grid_size[1], grid_size[2])).float()
-
-        # 3. extract surface
-        outputs = []
-        for i in range(batch_size):
-            try:
-                if mc_algo == 'mc':
-                    vertices, faces, normals, _ = measure.marching_cubes(
-                        grid_logits[i].cpu().numpy(),
-                        mc_level,
-                        method="lewiner"
-                    )
-                    vertices = vertices / grid_size * bbox_size + bbox_min
-                elif mc_algo == 'dmc':
-                    if not hasattr(self, 'dmc'):
-                        try:
-                            from diso import DiffDMC
-                        except:
-                            raise ImportError("Please install diso via `pip install diso`, or set mc_algo to 'mc'")
-                        self.dmc = DiffDMC(dtype=torch.float32).to(device)
-                    octree_resolution = 2 ** octree_depth if octree_resolution is None else octree_resolution
-                    sdf = -grid_logits[i] / octree_resolution
-                    verts, faces = self.dmc(sdf, deform=None, return_quads=False, normalize=True)
-                    verts = center_vertices(verts)
-                    vertices = verts.detach().cpu().numpy()
-                    faces = faces.detach().cpu().numpy()[:, ::-1]
-                else:
-                    raise ValueError(f"mc_algo {mc_algo} not supported.")
-
-                outputs.append(
-                    Latent2MeshOutput(
-                        mesh_v=vertices.astype(np.float32),
-                        mesh_f=np.ascontiguousarray(faces)
-                    )
-                )
-
-            except ValueError:
-                outputs.append(None)
-            except RuntimeError:
-                outputs.append(None)
-
-        return outputs
diff --git a/hy3dgen/shapegen/models/autoencoders/attention_processors.py b/hy3dgen/shapegen/models/autoencoders/attention_processors.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7b232eb0d16d67b9598a7e49f57d6616f2e5bed
--- /dev/null
+++ b/hy3dgen/shapegen/models/autoencoders/attention_processors.py
@@ -0,0 +1,96 @@
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+
+import os
+
+import torch
+import torch.nn.functional as F
+
+scaled_dot_product_attention = F.scaled_dot_product_attention
+if os.environ.get('CA_USE_SAGEATTN', '0') == '1':
+    try:
+        from sageattention import sageattn
+    except ImportError:
+        raise ImportError('Please install the package "sageattention" to use this USE_SAGEATTN.')
+    scaled_dot_product_attention = sageattn
+
+
+class CrossAttentionProcessor:
+    def __call__(self, attn, q, k, v):
+        out = scaled_dot_product_attention(q, k, v)
+        return out
+
+
+class FlashVDMCrossAttentionProcessor:
+    def __init__(self, topk=None):
+        self.topk = topk
+
+    def __call__(self, attn, q, k, v):
+        if k.shape[-2] == 3072:
+            topk = 1024
+        elif k.shape[-2] == 512:
+            topk = 256
+        else:
+            topk = k.shape[-2] // 3
+
+        if self.topk is True:
+            q1 = q[:, :, ::100, :]
+            sim = q1 @ k.transpose(-1, -2)
+            sim = torch.mean(sim, -2)
+            topk_ind = torch.topk(sim, dim=-1, k=topk).indices.squeeze(-2).unsqueeze(-1)
+            topk_ind = topk_ind.expand(-1, -1, -1, v.shape[-1])
+            v0 = torch.gather(v, dim=-2, index=topk_ind)
+            k0 = torch.gather(k, dim=-2, index=topk_ind)
+            out = scaled_dot_product_attention(q, k0, v0)
+        elif self.topk is False:
+            out = scaled_dot_product_attention(q, k, v)
+        else:
+            idx, counts = self.topk
+            start = 0
+            outs = []
+            for grid_coord, count in zip(idx, counts):
+                end = start + count
+                q_chunk = q[:, :, start:end, :]
+                k0, v0 = self.select_topkv(q_chunk, k, v, topk)
+                out = scaled_dot_product_attention(q_chunk, k0, v0)
+                outs.append(out)
+                start += count
+            out = torch.cat(outs, dim=-2)
+        self.topk = False
+        return out
+
+    def select_topkv(self, q_chunk, k, v, topk):
+        q1 = q_chunk[:, :, ::50, :]
+        sim = q1 @ k.transpose(-1, -2)
+        sim = torch.mean(sim, -2)
+        topk_ind = torch.topk(sim, dim=-1, k=topk).indices.squeeze(-2).unsqueeze(-1)
+        topk_ind = topk_ind.expand(-1, -1, -1, v.shape[-1])
+        v0 = torch.gather(v, dim=-2, index=topk_ind)
+        k0 = torch.gather(k, dim=-2, index=topk_ind)
+        return k0, v0
+
+
+class FlashVDMTopMCrossAttentionProcessor(FlashVDMCrossAttentionProcessor):
+    def select_topkv(self, q_chunk, k, v, topk):
+        q1 = q_chunk[:, :, ::30, :]
+        sim = q1 @ k.transpose(-1, -2)
+        # sim = sim.to(torch.float32)
+        sim = sim.softmax(-1)
+        sim = torch.mean(sim, 1)
+        activated_token = torch.where(sim > 1e-6)[2]
+        index = torch.unique(activated_token, return_counts=True)[0].unsqueeze(0).unsqueeze(0).unsqueeze(-1)
+        index = index.expand(-1, v.shape[1], -1, v.shape[-1])
+        v0 = torch.gather(v, dim=-2, index=index)
+        k0 = torch.gather(k, dim=-2, index=index)
+        return k0, v0
diff --git a/hy3dgen/shapegen/models/autoencoders/model.py b/hy3dgen/shapegen/models/autoencoders/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..76f78da2445470f3614b3cd9d75a3133bafc3a4e
--- /dev/null
+++ b/hy3dgen/shapegen/models/autoencoders/model.py
@@ -0,0 +1,189 @@
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+
+import os
+
+import torch
+import torch.nn as nn
+import yaml
+
+from .attention_blocks import FourierEmbedder, Transformer, CrossAttentionDecoder
+from .surface_extractors import MCSurfaceExtractor, SurfaceExtractors
+from .volume_decoders import VanillaVolumeDecoder, FlashVDMVolumeDecoding, HierarchicalVolumeDecoding
+from ...utils import logger, synchronize_timer, smart_load_model
+
+
+class VectsetVAE(nn.Module):
+
+    @classmethod
+    @synchronize_timer('VectsetVAE Model Loading')
+    def from_single_file(
+        cls,
+        ckpt_path,
+        config_path,
+        device='cuda',
+        dtype=torch.float16,
+        use_safetensors=None,
+        **kwargs,
+    ):
+        # load config
+        with open(config_path, 'r') as f:
+            config = yaml.safe_load(f)
+
+        # load ckpt
+        if use_safetensors:
+            ckpt_path = ckpt_path.replace('.ckpt', '.safetensors')
+        if not os.path.exists(ckpt_path):
+            raise FileNotFoundError(f"Model file {ckpt_path} not found")
+
+        logger.info(f"Loading model from {ckpt_path}")
+        if use_safetensors:
+            import safetensors.torch
+            ckpt = safetensors.torch.load_file(ckpt_path, device='cpu')
+        else:
+            ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=True)
+
+        model_kwargs = config['params']
+        model_kwargs.update(kwargs)
+
+        model = cls(**model_kwargs)
+        model.load_state_dict(ckpt)
+        model.to(device=device, dtype=dtype)
+        return model
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        model_path,
+        device='cuda',
+        dtype=torch.float16,
+        use_safetensors=True,
+        variant='fp16',
+        subfolder='hunyuan3d-vae-v2-0',
+        **kwargs,
+    ):
+        config_path, ckpt_path = smart_load_model(
+            model_path,
+            subfolder=subfolder,
+            use_safetensors=use_safetensors,
+            variant=variant
+        )
+
+        return cls.from_single_file(
+            ckpt_path,
+            config_path,
+            device=device,
+            dtype=dtype,
+            use_safetensors=use_safetensors,
+            **kwargs
+        )
+
+    def __init__(
+        self,
+        volume_decoder=None,
+        surface_extractor=None
+    ):
+        super().__init__()
+        if volume_decoder is None:
+            volume_decoder = VanillaVolumeDecoder()
+        if surface_extractor is None:
+            surface_extractor = MCSurfaceExtractor()
+        self.volume_decoder = volume_decoder
+        self.surface_extractor = surface_extractor
+
+    def latents2mesh(self, latents: torch.FloatTensor, **kwargs):
+        with synchronize_timer('Volume decoding'):
+            grid_logits = self.volume_decoder(latents, self.geo_decoder, **kwargs)
+        with synchronize_timer('Surface extraction'):
+            outputs = self.surface_extractor(grid_logits, **kwargs)
+        return outputs
+
+    def enable_flashvdm_decoder(
+        self,
+        enabled: bool = True,
+        adaptive_kv_selection=True,
+        topk_mode='mean',
+        mc_algo='dmc',
+    ):
+        if enabled:
+            if adaptive_kv_selection:
+                self.volume_decoder = FlashVDMVolumeDecoding(topk_mode)
+            else:
+                self.volume_decoder = HierarchicalVolumeDecoding()
+            if mc_algo not in SurfaceExtractors.keys():
+                raise ValueError(f'Unsupported mc_algo {mc_algo}, available: {list(SurfaceExtractors.keys())}')
+            self.surface_extractor = SurfaceExtractors[mc_algo]()
+        else:
+            self.volume_decoder = VanillaVolumeDecoder()
+            self.surface_extractor = MCSurfaceExtractor()
+
+
+class ShapeVAE(VectsetVAE):
+    def __init__(
+        self,
+        *,
+        num_latents: int,
+        embed_dim: int,
+        width: int,
+        heads: int,
+        num_decoder_layers: int,
+        geo_decoder_downsample_ratio: int = 1,
+        geo_decoder_mlp_expand_ratio: int = 4,
+        geo_decoder_ln_post: bool = True,
+        num_freqs: int = 8,
+        include_pi: bool = True,
+        qkv_bias: bool = True,
+        qk_norm: bool = False,
+        label_type: str = "binary",
+        drop_path_rate: float = 0.0,
+        scale_factor: float = 1.0,
+    ):
+        super().__init__()
+        self.geo_decoder_ln_post = geo_decoder_ln_post
+
+        self.fourier_embedder = FourierEmbedder(num_freqs=num_freqs, include_pi=include_pi)
+
+        self.post_kl = nn.Linear(embed_dim, width)
+
+        self.transformer = Transformer(
+            n_ctx=num_latents,
+            width=width,
+            layers=num_decoder_layers,
+            heads=heads,
+            qkv_bias=qkv_bias,
+            qk_norm=qk_norm,
+            drop_path_rate=drop_path_rate
+        )
+
+        self.geo_decoder = CrossAttentionDecoder(
+            fourier_embedder=self.fourier_embedder,
+            out_channels=1,
+            num_latents=num_latents,
+            mlp_expand_ratio=geo_decoder_mlp_expand_ratio,
+            downsample_ratio=geo_decoder_downsample_ratio,
+            enable_ln_post=self.geo_decoder_ln_post,
+            width=width // geo_decoder_downsample_ratio,
+            heads=heads // geo_decoder_downsample_ratio,
+            qkv_bias=qkv_bias,
+            qk_norm=qk_norm,
+            label_type=label_type,
+        )
+
+        self.scale_factor = scale_factor
+        self.latent_shape = (num_latents, embed_dim)
+
+    def forward(self, latents):
+        latents = self.post_kl(latents)
+        latents = self.transformer(latents)
+        return latents
diff --git a/hy3dgen/shapegen/models/autoencoders/surface_extractors.py b/hy3dgen/shapegen/models/autoencoders/surface_extractors.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4d8f63ab2cc56ddd2171f299519b226d77f2eba
--- /dev/null
+++ b/hy3dgen/shapegen/models/autoencoders/surface_extractors.py
@@ -0,0 +1,100 @@
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+
+from typing import Union, Tuple, List
+
+import numpy as np
+import torch
+from skimage import measure
+
+
+class Latent2MeshOutput:
+
+    def __init__(self, mesh_v=None, mesh_f=None):
+        self.mesh_v = mesh_v
+        self.mesh_f = mesh_f
+
+
+def center_vertices(vertices):
+    """Translate the vertices so that bounding box is centered at zero."""
+    vert_min = vertices.min(dim=0)[0]
+    vert_max = vertices.max(dim=0)[0]
+    vert_center = 0.5 * (vert_min + vert_max)
+    return vertices - vert_center
+
+
+class SurfaceExtractor:
+    def _compute_box_stat(self, bounds: Union[Tuple[float], List[float], float], octree_resolution: int):
+        if isinstance(bounds, float):
+            bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds]
+
+        bbox_min, bbox_max = np.array(bounds[0:3]), np.array(bounds[3:6])
+        bbox_size = bbox_max - bbox_min
+        grid_size = [int(octree_resolution) + 1, int(octree_resolution) + 1, int(octree_resolution) + 1]
+        return grid_size, bbox_min, bbox_size
+
+    def run(self, *args, **kwargs):
+        return NotImplementedError
+
+    def __call__(self, grid_logits, **kwargs):
+        outputs = []
+        for i in range(grid_logits.shape[0]):
+            try:
+                vertices, faces = self.run(grid_logits[i], **kwargs)
+                vertices = vertices.astype(np.float32)
+                faces = np.ascontiguousarray(faces)
+                outputs.append(Latent2MeshOutput(mesh_v=vertices, mesh_f=faces))
+
+            except Exception:
+                import traceback
+                traceback.print_exc()
+                outputs.append(None)
+
+        return outputs
+
+
+class MCSurfaceExtractor(SurfaceExtractor):
+    def run(self, grid_logit, *, mc_level, bounds, octree_resolution, **kwargs):
+        vertices, faces, normals, _ = measure.marching_cubes(
+            grid_logit.cpu().numpy(),
+            mc_level,
+            method="lewiner"
+        )
+        grid_size, bbox_min, bbox_size = self._compute_box_stat(bounds, octree_resolution)
+        vertices = vertices / grid_size * bbox_size + bbox_min
+        return vertices, faces
+
+
+class DMCSurfaceExtractor(SurfaceExtractor):
+    def run(self, grid_logit, *, octree_resolution, **kwargs):
+        device = grid_logit.device
+        if not hasattr(self, 'dmc'):
+            try:
+                from diso import DiffDMC
+            except:
+                raise ImportError("Please install diso via `pip install diso`, or set mc_algo to 'mc'")
+            self.dmc = DiffDMC(dtype=torch.float32).to(device)
+        sdf = -grid_logit / octree_resolution
+        sdf = sdf.to(torch.float32).contiguous()
+        verts, faces = self.dmc(sdf, deform=None, return_quads=False, normalize=True)
+        verts = center_vertices(verts)
+        vertices = verts.detach().cpu().numpy()
+        faces = faces.detach().cpu().numpy()[:, ::-1]
+        return vertices, faces
+
+
+SurfaceExtractors = {
+    'mc': MCSurfaceExtractor,
+    'dmc': DMCSurfaceExtractor,
+}
diff --git a/hy3dgen/shapegen/models/autoencoders/volume_decoders.py b/hy3dgen/shapegen/models/autoencoders/volume_decoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7bfd84647786b43d7815c788931326a305c7dca
--- /dev/null
+++ b/hy3dgen/shapegen/models/autoencoders/volume_decoders.py
@@ -0,0 +1,435 @@
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+
+from typing import Union, Tuple, List, Callable
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import repeat
+from tqdm import tqdm
+
+from .attention_blocks import CrossAttentionDecoder
+from .attention_processors import FlashVDMCrossAttentionProcessor, FlashVDMTopMCrossAttentionProcessor
+from ...utils import logger
+
+
+def extract_near_surface_volume_fn(input_tensor: torch.Tensor, alpha: float):
+    device = input_tensor.device
+    D = input_tensor.shape[0]
+    signed_val = 0.0
+
+    # 添加偏移并处理无效值
+    val = input_tensor + alpha
+    valid_mask = val > -9000  # 假设-9000是无效值
+
+    # 改进的邻居获取函数（保持维度一致）
+    def get_neighbor(t, shift, axis):
+        """根据指定轴进行位移并保持维度一致"""
+        if shift == 0:
+            return t.clone()
+
+        # 确定填充轴（输入为[D, D, D]对应z,y,x轴）
+        pad_dims = [0, 0, 0, 0, 0, 0]  # 格式：[x前，x后，y前，y后，z前，z后]
+
+        # 根据轴类型设置填充
+        if axis == 0:  # x轴（最后一个维度）
+            pad_idx = 0 if shift > 0 else 1
+            pad_dims[pad_idx] = abs(shift)
+        elif axis == 1:  # y轴（中间维度）
+            pad_idx = 2 if shift > 0 else 3
+            pad_dims[pad_idx] = abs(shift)
+        elif axis == 2:  # z轴（第一个维度）
+            pad_idx = 4 if shift > 0 else 5
+            pad_dims[pad_idx] = abs(shift)
+
+        # 执行填充（添加batch和channel维度适配F.pad）
+        padded = F.pad(t.unsqueeze(0).unsqueeze(0), pad_dims[::-1], mode='replicate')  # 反转顺序适配F.pad
+
+        # 构建动态切片索引
+        slice_dims = [slice(None)] * 3  # 初始化为全切片
+        if axis == 0:  # x轴（dim=2）
+            if shift > 0:
+                slice_dims[0] = slice(shift, None)
+            else:
+                slice_dims[0] = slice(None, shift)
+        elif axis == 1:  # y轴（dim=1）
+            if shift > 0:
+                slice_dims[1] = slice(shift, None)
+            else:
+                slice_dims[1] = slice(None, shift)
+        elif axis == 2:  # z轴（dim=0）
+            if shift > 0:
+                slice_dims[2] = slice(shift, None)
+            else:
+                slice_dims[2] = slice(None, shift)
+
+        # 应用切片并恢复维度
+        padded = padded.squeeze(0).squeeze(0)
+        sliced = padded[slice_dims]
+        return sliced
+
+    # 获取各方向邻居（确保维度一致）
+    left = get_neighbor(val, 1, axis=0)  # x方向
+    right = get_neighbor(val, -1, axis=0)
+    back = get_neighbor(val, 1, axis=1)  # y方向
+    front = get_neighbor(val, -1, axis=1)
+    down = get_neighbor(val, 1, axis=2)  # z方向
+    up = get_neighbor(val, -1, axis=2)
+
+    # 处理边界无效值（使用where保持维度一致）
+    def safe_where(neighbor):
+        return torch.where(neighbor > -9000, neighbor, val)
+
+    left = safe_where(left)
+    right = safe_where(right)
+    back = safe_where(back)
+    front = safe_where(front)
+    down = safe_where(down)
+    up = safe_where(up)
+
+    # 计算符号一致性（转换为float32确保精度）
+    sign = torch.sign(val.to(torch.float32))
+    neighbors_sign = torch.stack([
+        torch.sign(left.to(torch.float32)),
+        torch.sign(right.to(torch.float32)),
+        torch.sign(back.to(torch.float32)),
+        torch.sign(front.to(torch.float32)),
+        torch.sign(down.to(torch.float32)),
+        torch.sign(up.to(torch.float32))
+    ], dim=0)
+
+    # 检查所有符号是否一致
+    same_sign = torch.all(neighbors_sign == sign, dim=0)
+
+    # 生成最终掩码
+    mask = (~same_sign).to(torch.int32)
+    return mask * valid_mask.to(torch.int32)
+
+
+def generate_dense_grid_points(
+    bbox_min: np.ndarray,
+    bbox_max: np.ndarray,
+    octree_resolution: int,
+    indexing: str = "ij",
+):
+    length = bbox_max - bbox_min
+    num_cells = octree_resolution
+
+    x = np.linspace(bbox_min[0], bbox_max[0], int(num_cells) + 1, dtype=np.float32)
+    y = np.linspace(bbox_min[1], bbox_max[1], int(num_cells) + 1, dtype=np.float32)
+    z = np.linspace(bbox_min[2], bbox_max[2], int(num_cells) + 1, dtype=np.float32)
+    [xs, ys, zs] = np.meshgrid(x, y, z, indexing=indexing)
+    xyz = np.stack((xs, ys, zs), axis=-1)
+    grid_size = [int(num_cells) + 1, int(num_cells) + 1, int(num_cells) + 1]
+
+    return xyz, grid_size, length
+
+
+class VanillaVolumeDecoder:
+    @torch.no_grad()
+    def __call__(
+        self,
+        latents: torch.FloatTensor,
+        geo_decoder: Callable,
+        bounds: Union[Tuple[float], List[float], float] = 1.01,
+        num_chunks: int = 10000,
+        octree_resolution: int = None,
+        enable_pbar: bool = True,
+        **kwargs,
+    ):
+        device = latents.device
+        dtype = latents.dtype
+        batch_size = latents.shape[0]
+
+        # 1. generate query points
+        if isinstance(bounds, float):
+            bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds]
+
+        bbox_min, bbox_max = np.array(bounds[0:3]), np.array(bounds[3:6])
+        xyz_samples, grid_size, length = generate_dense_grid_points(
+            bbox_min=bbox_min,
+            bbox_max=bbox_max,
+            octree_resolution=octree_resolution,
+            indexing="ij"
+        )
+        xyz_samples = torch.from_numpy(xyz_samples).to(device, dtype=dtype).contiguous().reshape(-1, 3)
+
+        # 2. latents to 3d volume
+        batch_logits = []
+        for start in tqdm(range(0, xyz_samples.shape[0], num_chunks), desc=f"Volume Decoding",
+                          disable=not enable_pbar):
+            chunk_queries = xyz_samples[start: start + num_chunks, :]
+            chunk_queries = repeat(chunk_queries, "p c -> b p c", b=batch_size)
+            logits = geo_decoder(queries=chunk_queries, latents=latents)
+            batch_logits.append(logits)
+
+        grid_logits = torch.cat(batch_logits, dim=1)
+        grid_logits = grid_logits.view((batch_size, *grid_size)).float()
+
+        return grid_logits
+
+
+class HierarchicalVolumeDecoding:
+    @torch.no_grad()
+    def __call__(
+        self,
+        latents: torch.FloatTensor,
+        geo_decoder: Callable,
+        bounds: Union[Tuple[float], List[float], float] = 1.01,
+        num_chunks: int = 10000,
+        mc_level: float = 0.0,
+        octree_resolution: int = None,
+        min_resolution: int = 63,
+        enable_pbar: bool = True,
+        **kwargs,
+    ):
+        device = latents.device
+        dtype = latents.dtype
+
+        resolutions = []
+        if octree_resolution < min_resolution:
+            resolutions.append(octree_resolution)
+        while octree_resolution >= min_resolution:
+            resolutions.append(octree_resolution)
+            octree_resolution = octree_resolution // 2
+        resolutions.reverse()
+
+        # 1. generate query points
+        if isinstance(bounds, float):
+            bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds]
+        bbox_min = np.array(bounds[0:3])
+        bbox_max = np.array(bounds[3:6])
+        bbox_size = bbox_max - bbox_min
+
+        xyz_samples, grid_size, length = generate_dense_grid_points(
+            bbox_min=bbox_min,
+            bbox_max=bbox_max,
+            octree_resolution=resolutions[0],
+            indexing="ij"
+        )
+
+        dilate = nn.Conv3d(1, 1, 3, padding=1, bias=False, device=device, dtype=dtype)
+        dilate.weight = torch.nn.Parameter(torch.ones(dilate.weight.shape, dtype=dtype, device=device))
+
+        grid_size = np.array(grid_size)
+        xyz_samples = torch.from_numpy(xyz_samples).to(device, dtype=dtype).contiguous().reshape(-1, 3)
+
+        # 2. latents to 3d volume
+        batch_logits = []
+        batch_size = latents.shape[0]
+        for start in tqdm(range(0, xyz_samples.shape[0], num_chunks),
+                          desc=f"Hierarchical Volume Decoding [r{resolutions[0] + 1}]"):
+            queries = xyz_samples[start: start + num_chunks, :]
+            batch_queries = repeat(queries, "p c -> b p c", b=batch_size)
+            logits = geo_decoder(queries=batch_queries, latents=latents)
+            batch_logits.append(logits)
+
+        grid_logits = torch.cat(batch_logits, dim=1).view((batch_size, grid_size[0], grid_size[1], grid_size[2]))
+
+        for octree_depth_now in resolutions[1:]:
+            grid_size = np.array([octree_depth_now + 1] * 3)
+            resolution = bbox_size / octree_depth_now
+            next_index = torch.zeros(tuple(grid_size), dtype=dtype, device=device)
+            next_logits = torch.full(next_index.shape, -10000., dtype=dtype, device=device)
+            curr_points = extract_near_surface_volume_fn(grid_logits.squeeze(0), mc_level)
+            curr_points += grid_logits.squeeze(0).abs() < 0.95
+
+            if octree_depth_now == resolutions[-1]:
+                expand_num = 0
+            else:
+                expand_num = 1
+            for i in range(expand_num):
+                curr_points = dilate(curr_points.unsqueeze(0).to(dtype)).squeeze(0)
+            (cidx_x, cidx_y, cidx_z) = torch.where(curr_points > 0)
+            next_index[cidx_x * 2, cidx_y * 2, cidx_z * 2] = 1
+            for i in range(2 - expand_num):
+                next_index = dilate(next_index.unsqueeze(0)).squeeze(0)
+            nidx = torch.where(next_index > 0)
+
+            next_points = torch.stack(nidx, dim=1)
+            next_points = (next_points * torch.tensor(resolution, dtype=next_points.dtype, device=device) +
+                           torch.tensor(bbox_min, dtype=next_points.dtype, device=device))
+            batch_logits = []
+            for start in tqdm(range(0, next_points.shape[0], num_chunks),
+                              desc=f"Hierarchical Volume Decoding [r{octree_depth_now + 1}]"):
+                queries = next_points[start: start + num_chunks, :]
+                batch_queries = repeat(queries, "p c -> b p c", b=batch_size)
+                logits = geo_decoder(queries=batch_queries.to(latents.dtype), latents=latents)
+                batch_logits.append(logits)
+            grid_logits = torch.cat(batch_logits, dim=1)
+            next_logits[nidx] = grid_logits[0, ..., 0]
+            grid_logits = next_logits.unsqueeze(0)
+        grid_logits[grid_logits == -10000.] = float('nan')
+
+        return grid_logits
+
+
+class FlashVDMVolumeDecoding:
+    def __init__(self, topk_mode='mean'):
+        if topk_mode not in ['mean', 'merge']:
+            raise ValueError(f'Unsupported topk_mode {topk_mode}, available: {["mean", "merge"]}')
+
+        if topk_mode == 'mean':
+            self.processor = FlashVDMCrossAttentionProcessor()
+        else:
+            self.processor = FlashVDMTopMCrossAttentionProcessor()
+
+    @torch.no_grad()
+    def __call__(
+        self,
+        latents: torch.FloatTensor,
+        geo_decoder: CrossAttentionDecoder,
+        bounds: Union[Tuple[float], List[float], float] = 1.01,
+        num_chunks: int = 10000,
+        mc_level: float = 0.0,
+        octree_resolution: int = None,
+        min_resolution: int = 63,
+        mini_grid_num: int = 4,
+        enable_pbar: bool = True,
+        **kwargs,
+    ):
+        processor = self.processor
+        geo_decoder.set_cross_attention_processor(processor)
+
+        device = latents.device
+        dtype = latents.dtype
+
+        resolutions = []
+        if octree_resolution < min_resolution:
+            resolutions.append(octree_resolution)
+        while octree_resolution >= min_resolution:
+            resolutions.append(octree_resolution)
+            octree_resolution = octree_resolution // 2
+        resolutions.reverse()
+        resolutions[0] = round(resolutions[0] / mini_grid_num) * mini_grid_num - 1
+        for i, resolution in enumerate(resolutions[1:]):
+            resolutions[i + 1] = resolutions[0] * 2 ** (i + 1)
+
+        logger.info(f"FlashVDMVolumeDecoding Resolution: {resolutions}")
+
+        # 1. generate query points
+        if isinstance(bounds, float):
+            bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds]
+        bbox_min = np.array(bounds[0:3])
+        bbox_max = np.array(bounds[3:6])
+        bbox_size = bbox_max - bbox_min
+
+        xyz_samples, grid_size, length = generate_dense_grid_points(
+            bbox_min=bbox_min,
+            bbox_max=bbox_max,
+            octree_resolution=resolutions[0],
+            indexing="ij"
+        )
+
+        dilate = nn.Conv3d(1, 1, 3, padding=1, bias=False, device=device, dtype=dtype)
+        dilate.weight = torch.nn.Parameter(torch.ones(dilate.weight.shape, dtype=dtype, device=device))
+
+        grid_size = np.array(grid_size)
+
+        # 2. latents to 3d volume
+        xyz_samples = torch.from_numpy(xyz_samples).to(device, dtype=dtype)
+        batch_size = latents.shape[0]
+        mini_grid_size = xyz_samples.shape[0] // mini_grid_num
+        xyz_samples = xyz_samples.view(
+            mini_grid_num, mini_grid_size,
+            mini_grid_num, mini_grid_size,
+            mini_grid_num, mini_grid_size, 3
+        ).permute(
+            0, 2, 4, 1, 3, 5, 6
+        ).reshape(
+            -1, mini_grid_size * mini_grid_size * mini_grid_size, 3
+        )
+        batch_logits = []
+        num_batchs = max(num_chunks // xyz_samples.shape[1], 1)
+        for start in tqdm(range(0, xyz_samples.shape[0], num_batchs),
+                          desc=f"FlashVDM Volume Decoding", disable=not enable_pbar):
+            queries = xyz_samples[start: start + num_batchs, :]
+            batch = queries.shape[0]
+            batch_latents = repeat(latents.squeeze(0), "p c -> b p c", b=batch)
+            processor.topk = True
+            logits = geo_decoder(queries=queries, latents=batch_latents)
+            batch_logits.append(logits)
+        grid_logits = torch.cat(batch_logits, dim=0).reshape(
+            mini_grid_num, mini_grid_num, mini_grid_num,
+            mini_grid_size, mini_grid_size,
+            mini_grid_size
+        ).permute(0, 3, 1, 4, 2, 5).contiguous().view(
+            (batch_size, grid_size[0], grid_size[1], grid_size[2])
+        )
+
+        for octree_depth_now in resolutions[1:]:
+            grid_size = np.array([octree_depth_now + 1] * 3)
+            resolution = bbox_size / octree_depth_now
+            next_index = torch.zeros(tuple(grid_size), dtype=dtype, device=device)
+            next_logits = torch.full(next_index.shape, -10000., dtype=dtype, device=device)
+            curr_points = extract_near_surface_volume_fn(grid_logits.squeeze(0), mc_level)
+            curr_points += grid_logits.squeeze(0).abs() < 0.95
+
+            if octree_depth_now == resolutions[-1]:
+                expand_num = 0
+            else:
+                expand_num = 1
+            for i in range(expand_num):
+                curr_points = dilate(curr_points.unsqueeze(0).to(dtype)).squeeze(0)
+            (cidx_x, cidx_y, cidx_z) = torch.where(curr_points > 0)
+
+            next_index[cidx_x * 2, cidx_y * 2, cidx_z * 2] = 1
+            for i in range(2 - expand_num):
+                next_index = dilate(next_index.unsqueeze(0)).squeeze(0)
+            nidx = torch.where(next_index > 0)
+
+            next_points = torch.stack(nidx, dim=1)
+            next_points = (next_points * torch.tensor(resolution, dtype=torch.float32, device=device) +
+                           torch.tensor(bbox_min, dtype=torch.float32, device=device))
+
+            query_grid_num = 6
+            min_val = next_points.min(axis=0).values
+            max_val = next_points.max(axis=0).values
+            vol_queries_index = (next_points - min_val) / (max_val - min_val) * (query_grid_num - 0.001)
+            index = torch.floor(vol_queries_index).long()
+            index = index[..., 0] * (query_grid_num ** 2) + index[..., 1] * query_grid_num + index[..., 2]
+            index = index.sort()
+            next_points = next_points[index.indices].unsqueeze(0).contiguous()
+            unique_values = torch.unique(index.values, return_counts=True)
+            grid_logits = torch.zeros((next_points.shape[1]), dtype=latents.dtype, device=latents.device)
+            input_grid = [[], []]
+            logits_grid_list = []
+            start_num = 0
+            sum_num = 0
+            for grid_index, count in zip(unique_values[0].cpu().tolist(), unique_values[1].cpu().tolist()):
+                if sum_num + count < num_chunks or sum_num == 0:
+                    sum_num += count
+                    input_grid[0].append(grid_index)
+                    input_grid[1].append(count)
+                else:
+                    processor.topk = input_grid
+                    logits_grid = geo_decoder(queries=next_points[:, start_num:start_num + sum_num], latents=latents)
+                    start_num = start_num + sum_num
+                    logits_grid_list.append(logits_grid)
+                    input_grid = [[grid_index], [count]]
+                    sum_num = count
+            if sum_num > 0:
+                processor.topk = input_grid
+                logits_grid = geo_decoder(queries=next_points[:, start_num:start_num + sum_num], latents=latents)
+                logits_grid_list.append(logits_grid)
+            logits_grid = torch.cat(logits_grid_list, dim=1)
+            grid_logits[index.indices] = logits_grid.squeeze(0).squeeze(-1)
+            next_logits[nidx] = grid_logits
+            grid_logits = next_logits.unsqueeze(0)
+
+        grid_logits[grid_logits == -10000.] = float('nan')
+
+        return grid_logits
diff --git a/hy3dgen/shapegen/models/conditioner.py b/hy3dgen/shapegen/models/conditioner.py
old mode 100644
new mode 100755
index 1af4c0cc440a193167c0837621c3494242b95f3d..d0d848c3b0d82eba4e4453d2c266f8fa7a1aeaaa
--- a/hy3dgen/shapegen/models/conditioner.py
+++ b/hy3dgen/shapegen/models/conditioner.py
@@ -22,6 +22,7 @@
 # fine-tuning enabling code and other elements of the foregoing made publicly available
 # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
+import numpy as np
 import torch
 import torch.nn as nn
 from torchvision import transforms
@@ -33,6 +34,26 @@ from transformers import (
 )
 
 
+def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
+    """
+    embed_dim: output dimension for each position
+    pos: a list of positions to be encoded: size (M,)
+    out: (M, D)
+    """
+    assert embed_dim % 2 == 0
+    omega = np.arange(embed_dim // 2, dtype=np.float64)
+    omega /= embed_dim / 2.
+    omega = 1. / 10000 ** omega  # (D/2,)
+
+    pos = pos.reshape(-1)  # (M,)
+    out = np.einsum('m,d->md', pos, omega)  # (M, D/2), outer product
+
+    emb_sin = np.sin(out)  # (M, D/2)
+    emb_cos = np.cos(out)  # (M, D/2)
+
+    return np.concatenate([emb_sin, emb_cos], axis=1)
+
+
 class ImageEncoder(nn.Module):
     def __init__(
         self,
@@ -67,7 +88,7 @@ class ImageEncoder(nn.Module):
             ]
         )
 
-    def forward(self, image, mask=None, value_range=(-1, 1)):
+    def forward(self, image, mask=None, value_range=(-1, 1), **kwargs):
         if value_range is not None:
             low, high = value_range
             image = (image - low) / (high - low)
@@ -82,7 +103,7 @@ class ImageEncoder(nn.Module):
 
         return last_hidden_state
 
-    def unconditional_embedding(self, batch_size):
+    def unconditional_embedding(self, batch_size, **kwargs):
         device = next(self.model.parameters()).device
         dtype = next(self.model.parameters()).dtype
         zero = torch.zeros(
@@ -110,11 +131,82 @@ class DinoImageEncoder(ImageEncoder):
     std = [0.229, 0.224, 0.225]
 
 
+class DinoImageEncoderMV(DinoImageEncoder):
+    def __init__(
+        self,
+        version=None,
+        config=None,
+        use_cls_token=True,
+        image_size=224,
+        view_num=4,
+        **kwargs,
+    ):
+        super().__init__(version, config, use_cls_token, image_size, **kwargs)
+        self.view_num = view_num
+        self.num_patches = self.num_patches
+        pos = np.arange(self.view_num, dtype=np.float32)
+        view_embedding = torch.from_numpy(
+            get_1d_sincos_pos_embed_from_grid(self.model.config.hidden_size, pos)).float()
+
+        view_embedding = view_embedding.unsqueeze(1).repeat(1, self.num_patches, 1)
+        self.view_embed = view_embedding.unsqueeze(0)
+
+    def forward(self, image, mask=None, value_range=(-1, 1), view_idxs=None):
+        if value_range is not None:
+            low, high = value_range
+            image = (image - low) / (high - low)
+
+        image = image.to(self.model.device, dtype=self.model.dtype)
+
+        bs, num_views, c, h, w = image.shape
+        image = image.view(bs * num_views, c, h, w)
+
+        inputs = self.transform(image)
+        outputs = self.model(inputs)
+
+        last_hidden_state = outputs.last_hidden_state
+        last_hidden_state = last_hidden_state.view(
+            bs, num_views, last_hidden_state.shape[-2],
+            last_hidden_state.shape[-1]
+        )
+
+        view_embedding = self.view_embed.to(last_hidden_state.dtype).to(last_hidden_state.device)
+        if view_idxs is not None:
+            assert len(view_idxs) == bs
+            view_embeddings = []
+            for i in range(bs):
+                view_idx = view_idxs[i]
+                assert num_views == len(view_idx)
+                view_embeddings.append(self.view_embed[:, view_idx, ...])
+            view_embedding = torch.cat(view_embeddings, 0).to(last_hidden_state.dtype).to(last_hidden_state.device)
+
+        if num_views != self.view_num:
+            view_embedding = view_embedding[:, :num_views, ...]
+        last_hidden_state = last_hidden_state + view_embedding
+        last_hidden_state = last_hidden_state.view(bs, num_views * last_hidden_state.shape[-2],
+                                                   last_hidden_state.shape[-1])
+        return last_hidden_state
+
+    def unconditional_embedding(self, batch_size, view_idxs=None, **kwargs):
+        device = next(self.model.parameters()).device
+        dtype = next(self.model.parameters()).dtype
+        zero = torch.zeros(
+            batch_size,
+            self.num_patches * len(view_idxs[0]),
+            self.model.config.hidden_size,
+            device=device,
+            dtype=dtype,
+        )
+        return zero
+
+
 def build_image_encoder(config):
     if config['type'] == 'CLIPImageEncoder':
         return CLIPImageEncoder(**config['kwargs'])
     elif config['type'] == 'DinoImageEncoder':
         return DinoImageEncoder(**config['kwargs'])
+    elif config['type'] == 'DinoImageEncoderMV':
+        return DinoImageEncoderMV(**config['kwargs'])
     else:
         raise ValueError(f'Unknown image encoder type: {config["type"]}')
 
@@ -129,17 +221,17 @@ class DualImageEncoder(nn.Module):
         self.main_image_encoder = build_image_encoder(main_image_encoder)
         self.additional_image_encoder = build_image_encoder(additional_image_encoder)
 
-    def forward(self, image, mask=None):
+    def forward(self, image, mask=None, **kwargs):
         outputs = {
-            'main': self.main_image_encoder(image, mask=mask),
-            'additional': self.additional_image_encoder(image, mask=mask),
+            'main': self.main_image_encoder(image, mask=mask, **kwargs),
+            'additional': self.additional_image_encoder(image, mask=mask, **kwargs),
         }
         return outputs
 
-    def unconditional_embedding(self, batch_size):
+    def unconditional_embedding(self, batch_size, **kwargs):
         outputs = {
-            'main': self.main_image_encoder.unconditional_embedding(batch_size),
-            'additional': self.additional_image_encoder.unconditional_embedding(batch_size),
+            'main': self.main_image_encoder.unconditional_embedding(batch_size, **kwargs),
+            'additional': self.additional_image_encoder.unconditional_embedding(batch_size, **kwargs),
         }
         return outputs
 
@@ -152,14 +244,14 @@ class SingleImageEncoder(nn.Module):
         super().__init__()
         self.main_image_encoder = build_image_encoder(main_image_encoder)
 
-    def forward(self, image, mask=None):
+    def forward(self, image, mask=None, **kwargs):
         outputs = {
-            'main': self.main_image_encoder(image, mask=mask),
+            'main': self.main_image_encoder(image, mask=mask, **kwargs),
         }
         return outputs
 
-    def unconditional_embedding(self, batch_size):
+    def unconditional_embedding(self, batch_size, **kwargs):
         outputs = {
-            'main': self.main_image_encoder.unconditional_embedding(batch_size),
+            'main': self.main_image_encoder.unconditional_embedding(batch_size, **kwargs),
         }
         return outputs
diff --git a/hy3dgen/shapegen/models/denoisers/__init__.py b/hy3dgen/shapegen/models/denoisers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..72609333e27d68d377905ba9e67655de7021c31b
--- /dev/null
+++ b/hy3dgen/shapegen/models/denoisers/__init__.py
@@ -0,0 +1,15 @@
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+
+from .hunyuan3ddit import Hunyuan3DDiT
diff --git a/hy3dgen/shapegen/models/hunyuan3ddit.py b/hy3dgen/shapegen/models/denoisers/hunyuan3ddit.py
old mode 100644
new mode 100755
similarity index 91%
rename from hy3dgen/shapegen/models/hunyuan3ddit.py
rename to hy3dgen/shapegen/models/denoisers/hunyuan3ddit.py
index d1c778666890cb13538eba15460cf0c05c7f9130..7873f1660b40b860b3f87cd0b799ce29890f8620
--- a/hy3dgen/shapegen/models/hunyuan3ddit.py
+++ b/hy3dgen/shapegen/models/denoisers/hunyuan3ddit.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -23,6 +13,7 @@
 # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
 import math
+import os
 from dataclasses import dataclass
 from typing import List, Tuple, Optional
 
@@ -30,9 +21,17 @@ import torch
 from einops import rearrange
 from torch import Tensor, nn
 
+scaled_dot_product_attention = nn.functional.scaled_dot_product_attention
+if os.environ.get('USE_SAGEATTN', '0') == '1':
+    try:
+        from sageattention import sageattn
+    except ImportError:
+        raise ImportError('Please install the package "sageattention" to use this USE_SAGEATTN.')
+    scaled_dot_product_attention = sageattn
+
 
 def attention(q: Tensor, k: Tensor, v: Tensor, **kwargs) -> Tensor:
-    x = torch.nn.functional.scaled_dot_product_attention(q, k, v)
+    x = scaled_dot_product_attention(q, k, v)
     x = rearrange(x, "B H L D -> B L (H D)")
     return x
 
@@ -61,6 +60,15 @@ def timestep_embedding(t: Tensor, dim, max_period=10000, time_factor: float = 10
     return embedding
 
 
+class GELU(nn.Module):
+    def __init__(self, approximate='tanh'):
+        super().__init__()
+        self.approximate = approximate
+
+    def forward(self, x: Tensor) -> Tensor:
+        return nn.functional.gelu(x.contiguous(), approximate=self.approximate)
+
+
 class MLPEmbedder(nn.Module):
     def __init__(self, in_dim: int, hidden_dim: int):
         super().__init__()
@@ -163,7 +171,7 @@ class DoubleStreamBlock(nn.Module):
         self.img_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
         self.img_mlp = nn.Sequential(
             nn.Linear(hidden_size, mlp_hidden_dim, bias=True),
-            nn.GELU(approximate="tanh"),
+            GELU(approximate="tanh"),
             nn.Linear(mlp_hidden_dim, hidden_size, bias=True),
         )
 
@@ -174,7 +182,7 @@ class DoubleStreamBlock(nn.Module):
         self.txt_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
         self.txt_mlp = nn.Sequential(
             nn.Linear(hidden_size, mlp_hidden_dim, bias=True),
-            nn.GELU(approximate="tanh"),
+            GELU(approximate="tanh"),
             nn.Linear(mlp_hidden_dim, hidden_size, bias=True),
         )
 
@@ -240,7 +248,7 @@ class SingleStreamBlock(nn.Module):
         self.hidden_size = hidden_size
         self.pre_norm = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
 
-        self.mlp_act = nn.GELU(approximate="tanh")
+        self.mlp_act = GELU(approximate="tanh")
         self.modulation = Modulation(hidden_size, double=False)
 
     def forward(self, x: Tensor, vec: Tensor, pe: Tensor) -> Tensor:
@@ -287,6 +295,7 @@ class Hunyuan3DDiT(nn.Module):
         theta: int = 10_000,
         qkv_bias: bool = True,
         time_factor: float = 1000,
+        guidance_embed: bool = False,
         ckpt_path: Optional[str] = None,
         **kwargs,
     ):
@@ -303,6 +312,7 @@ class Hunyuan3DDiT(nn.Module):
         self.qkv_bias = qkv_bias
         self.time_factor = time_factor
         self.out_channels = self.in_channels
+        self.guidance_embed = guidance_embed
 
         if hidden_size % num_heads != 0:
             raise ValueError(
@@ -316,6 +326,9 @@ class Hunyuan3DDiT(nn.Module):
         self.latent_in = nn.Linear(self.in_channels, self.hidden_size, bias=True)
         self.time_in = MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size)
         self.cond_in = nn.Linear(context_in_dim, self.hidden_size)
+        self.guidance_in = (
+            MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size) if guidance_embed else nn.Identity()
+        )
 
         self.double_blocks = nn.ModuleList(
             [
@@ -374,7 +387,14 @@ class Hunyuan3DDiT(nn.Module):
     ) -> Tensor:
         cond = contexts['main']
         latent = self.latent_in(x)
+
         vec = self.time_in(timestep_embedding(t, 256, self.time_factor).to(dtype=latent.dtype))
+        if self.guidance_embed:
+            guidance = kwargs.get('guidance', None)
+            if guidance is None:
+                raise ValueError("Didn't get guidance strength for guidance distilled model.")
+            vec = vec + self.guidance_in(timestep_embedding(guidance, 256, self.time_factor))
+
         cond = self.cond_in(cond)
         pe = None
 
diff --git a/hy3dgen/shapegen/pipelines.py b/hy3dgen/shapegen/pipelines.py
old mode 100644
new mode 100755
index 5d535ea2b222e49a659c0fbbbfbe23f50cdb947e..ea265fa026ef386f7c41117fa8711a08b86acf80
--- a/hy3dgen/shapegen/pipelines.py
+++ b/hy3dgen/shapegen/pipelines.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -25,7 +15,6 @@
 import copy
 import importlib
 import inspect
-import logging
 import os
 from typing import List, Optional, Union
 
@@ -35,9 +24,12 @@ import trimesh
 import yaml
 from PIL import Image
 from diffusers.utils.torch_utils import randn_tensor
+from diffusers.utils.import_utils import is_accelerate_version, is_accelerate_available
 from tqdm import tqdm
 
-logger = logging.getLogger(__name__)
+from .models.autoencoders import ShapeVAE
+from .models.autoencoders import SurfaceExtractors
+from .utils import logger, synchronize_timer, smart_load_model
 
 
 def retrieve_timesteps(
@@ -99,6 +91,7 @@ def retrieve_timesteps(
     return timesteps, num_inference_steps
 
 
+@synchronize_timer('Export to trimesh')
 def export_to_trimesh(mesh_output):
     if isinstance(mesh_output, list):
         outputs = []
@@ -135,7 +128,11 @@ def instantiate_from_config(config, **kwargs):
 
 
 class Hunyuan3DDiTPipeline:
+    model_cpu_offload_seq = "conditioner->model->vae"
+    _exclude_from_cpu_offload = []
+
     @classmethod
+    @synchronize_timer('Hunyuan3DDiTPipeline Model Loading')
     def from_single_file(
         cls,
         ckpt_path,
@@ -168,7 +165,7 @@ class Hunyuan3DDiTPipeline:
                     ckpt[model_name] = {}
                 ckpt[model_name][new_key] = value
         else:
-            ckpt = torch.load(ckpt_path, map_location='cpu')
+            ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=True)
         # load model
         model = instantiate_from_config(config['model'])
         model.load_state_dict(ckpt['model'])
@@ -186,7 +183,6 @@ class Hunyuan3DDiTPipeline:
             scheduler=scheduler,
             conditioner=conditioner,
             image_processor=image_processor,
-            scheduler_cfg=config['scheduler'],
             device=device,
             dtype=dtype,
         )
@@ -200,37 +196,27 @@ class Hunyuan3DDiTPipeline:
     def from_pretrained(
         cls,
         model_path,
-        ckpt_name='model.ckpt',
-        config_name='config.yaml',
         device='cuda',
         dtype=torch.float16,
-        use_safetensors=None,
+        use_safetensors=True,
+        variant='fp16',
+        subfolder='hunyuan3d-dit-v2-0',
         **kwargs,
     ):
-        original_model_path = model_path
-        if not os.path.exists(model_path):
-            # try local path
-            base_dir = os.environ.get('HY3DGEN_MODELS', '~/.cache/hy3dgen')
-            model_path = os.path.expanduser(os.path.join(base_dir, model_path, 'hunyuan3d-dit-v2-0'))
-            if not os.path.exists(model_path):
-                
-                try:
-                    import huggingface_hub
-                    path = snapshot_download(
-                        repo_id=original_model_path,
-                        allow_patterns=[f"hunyuan3d-dit-v2-0/*"],  # 关键修改：模式匹配子文件夹
-                    )
-                    model_path = os.path.join(path, 'hunyuan3d-dit-v2-0')
-                except ImportError:
-                    logger.warning(
-                        "You need to install HuggingFace Hub to load models from the hub."
-                    )
-                    raise RuntimeError(f"Model path {model_path} not found")
-        if not os.path.exists(model_path):
-            raise FileNotFoundError(f"Model path {original_model_path} not found")
-
-        config_path = os.path.join(model_path, config_name)
-        ckpt_path = os.path.join(model_path, ckpt_name)
+        kwargs['from_pretrained_kwargs'] = dict(
+            model_path=model_path,
+            subfolder=subfolder,
+            use_safetensors=use_safetensors,
+            variant=variant,
+            dtype=dtype,
+            device=device,
+        )
+        config_path, ckpt_path = smart_load_model(
+            model_path,
+            subfolder=subfolder,
+            use_safetensors=use_safetensors,
+            variant=variant
+        )
         return cls.from_single_file(
             ckpt_path,
             config_path,
@@ -257,27 +243,185 @@ class Hunyuan3DDiTPipeline:
         self.conditioner = conditioner
         self.image_processor = image_processor
         self.kwargs = kwargs
-
         self.to(device, dtype)
 
+    def compile(self):
+        self.vae = torch.compile(self.vae)
+        self.model = torch.compile(self.model)
+        self.conditioner = torch.compile(self.conditioner)
+
+    def enable_flashvdm(
+        self,
+        enabled: bool = True,
+        adaptive_kv_selection=True,
+        topk_mode='mean',
+        mc_algo='dmc',
+        replace_vae=True,
+    ):
+        if enabled:
+            model_path = self.kwargs['from_pretrained_kwargs']['model_path']
+            turbo_vae_mapping = {
+                'Hunyuan3D-2': ('tencent/Hunyuan3D-2', 'hunyuan3d-vae-v2-0-turbo'),
+                'Hunyuan3D-2mv': ('tencent/Hunyuan3D-2', 'hunyuan3d-vae-v2-0-turbo'),
+                'Hunyuan3D-2mini': ('tencent/Hunyuan3D-2mini', 'hunyuan3d-vae-v2-mini-turbo'),
+            }
+            model_name = model_path.split('/')[-1]
+            if replace_vae and model_name in turbo_vae_mapping:
+                model_path, subfolder = turbo_vae_mapping[model_name]
+                self.vae = ShapeVAE.from_pretrained(
+                    model_path, subfolder=subfolder,
+                    use_safetensors=self.kwargs['from_pretrained_kwargs']['use_safetensors'],
+                    device=self.device,
+                )
+            self.vae.enable_flashvdm_decoder(
+                enabled=enabled,
+                adaptive_kv_selection=adaptive_kv_selection,
+                topk_mode=topk_mode,
+                mc_algo=mc_algo
+            )
+        else:
+            model_path = self.kwargs['from_pretrained_kwargs']['model_path']
+            vae_mapping = {
+                'Hunyuan3D-2': ('tencent/Hunyuan3D-2', 'hunyuan3d-vae-v2-0'),
+                'Hunyuan3D-2mv': ('tencent/Hunyuan3D-2', 'hunyuan3d-vae-v2-0'),
+                'Hunyuan3D-2mini': ('tencent/Hunyuan3D-2mini', 'hunyuan3d-vae-v2-mini'),
+            }
+            model_name = model_path.split('/')[-1]
+            if model_name in vae_mapping:
+                model_path, subfolder = vae_mapping[model_name]
+                self.vae = ShapeVAE.from_pretrained(model_path, subfolder=subfolder)
+            self.vae.enable_flashvdm_decoder(enabled=False)
+
     def to(self, device=None, dtype=None):
-        if device is not None:
-            self.device = torch.device(device)
-            self.vae.to(device)
-            self.model.to(device)
-            self.conditioner.to(device)
         if dtype is not None:
             self.dtype = dtype
             self.vae.to(dtype=dtype)
             self.model.to(dtype=dtype)
             self.conditioner.to(dtype=dtype)
+        if device is not None:
+            self.device = torch.device(device)
+            self.vae.to(device)
+            self.model.to(device)
+            self.conditioner.to(device)
+
+    @property
+    def _execution_device(self):
+        r"""
+        Returns the device on which the pipeline's models will be executed. After calling
+        [`~DiffusionPipeline.enable_sequential_cpu_offload`] the execution device can only be inferred from
+        Accelerate's module hooks.
+        """
+        for name, model in self.components.items():
+            if not isinstance(model, torch.nn.Module) or name in self._exclude_from_cpu_offload:
+                continue
+
+            if not hasattr(model, "_hf_hook"):
+                return self.device
+            for module in model.modules():
+                if (
+                    hasattr(module, "_hf_hook")
+                    and hasattr(module._hf_hook, "execution_device")
+                    and module._hf_hook.execution_device is not None
+                ):
+                    return torch.device(module._hf_hook.execution_device)
+        return self.device
+
+    def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
+        r"""
+        Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
+        to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
+        method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
+        `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
+
+        Arguments:
+            gpu_id (`int`, *optional*):
+                The ID of the accelerator that shall be used in inference. If not specified, it will default to 0.
+            device (`torch.Device` or `str`, *optional*, defaults to "cuda"):
+                The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
+                default to "cuda".
+        """
+        if self.model_cpu_offload_seq is None:
+            raise ValueError(
+                "Model CPU offload cannot be enabled because no `model_cpu_offload_seq` class attribute is set."
+            )
+
+        if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
+            from accelerate import cpu_offload_with_hook
+        else:
+            raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
+
+        torch_device = torch.device(device)
+        device_index = torch_device.index
+
+        if gpu_id is not None and device_index is not None:
+            raise ValueError(
+                f"You have passed both `gpu_id`={gpu_id} and an index as part of the passed device `device`={device}"
+                f"Cannot pass both. Please make sure to either not define `gpu_id` or not pass the index as part of the device: `device`={torch_device.type}"
+            )
+
+        # _offload_gpu_id should be set to passed gpu_id (or id in passed `device`) or default to previously set id or default to 0
+        self._offload_gpu_id = gpu_id or torch_device.index or getattr(self, "_offload_gpu_id", 0)
+
+        device_type = torch_device.type
+        device = torch.device(f"{device_type}:{self._offload_gpu_id}")
+
+        if self.device.type != "cpu":
+            self.to("cpu")
+            device_mod = getattr(torch, self.device.type, None)
+            if hasattr(device_mod, "empty_cache") and device_mod.is_available():
+                device_mod.empty_cache()  # otherwise we don't see the memory savings (but they probably exist)
+
+        all_model_components = {k: v for k, v in self.components.items() if isinstance(v, torch.nn.Module)}
+
+        self._all_hooks = []
+        hook = None
+        for model_str in self.model_cpu_offload_seq.split("->"):
+            model = all_model_components.pop(model_str, None)
+            if not isinstance(model, torch.nn.Module):
+                continue
 
-    def encode_cond(self, image, mask, do_classifier_free_guidance, dual_guidance):
+            _, hook = cpu_offload_with_hook(model, device, prev_module_hook=hook)
+            self._all_hooks.append(hook)
+
+        # CPU offload models that are not in the seq chain unless they are explicitly excluded
+        # these models will stay on CPU until maybe_free_model_hooks is called
+        # some models cannot be in the seq chain because they are iteratively called, such as controlnet
+        for name, model in all_model_components.items():
+            if not isinstance(model, torch.nn.Module):
+                continue
+
+            if name in self._exclude_from_cpu_offload:
+                model.to(device)
+            else:
+                _, hook = cpu_offload_with_hook(model, device)
+                self._all_hooks.append(hook)
+
+    def maybe_free_model_hooks(self):
+        r"""
+        Function that offloads all components, removes all model hooks that were added when using
+        `enable_model_cpu_offload` and then applies them again. In case the model has not been offloaded this function
+        is a no-op. Make sure to add this function to the end of the `__call__` function of your pipeline so that it
+        functions correctly when applying enable_model_cpu_offload.
+        """
+        if not hasattr(self, "_all_hooks") or len(self._all_hooks) == 0:
+            # `enable_model_cpu_offload` has not be called, so silently do nothing
+            return
+
+        for hook in self._all_hooks:
+            # offload model and remove hook from model
+            hook.offload()
+            hook.remove()
+
+        # make sure the model is in the same state as before calling it
+        self.enable_model_cpu_offload()
+
+    @synchronize_timer('Encode cond')
+    def encode_cond(self, image, additional_cond_inputs, do_classifier_free_guidance, dual_guidance):
         bsz = image.shape[0]
-        cond = self.conditioner(image=image, mask=mask)
+        cond = self.conditioner(image=image, **additional_cond_inputs)
 
         if do_classifier_free_guidance:
-            un_cond = self.conditioner.unconditional_embedding(bsz)
+            un_cond = self.conditioner.unconditional_embedding(bsz, **additional_cond_inputs)
 
             if dual_guidance:
                 un_cond_drop_main = copy.deepcopy(un_cond)
@@ -293,8 +437,6 @@ class Hunyuan3DDiTPipeline:
 
                 cond = cat_recursive(cond, un_cond_drop_main, un_cond)
             else:
-                un_cond = self.conditioner.unconditional_embedding(bsz)
-
                 def cat_recursive(a, b):
                     if isinstance(a, torch.Tensor):
                         return torch.cat([a, b], dim=0).to(self.dtype)
@@ -340,25 +482,27 @@ class Hunyuan3DDiTPipeline:
         latents = latents * getattr(self.scheduler, 'init_noise_sigma', 1.0)
         return latents
 
-    def prepare_image(self, image):
+    def prepare_image(self, image) -> dict:
         if isinstance(image, str) and not os.path.exists(image):
             raise FileNotFoundError(f"Couldn't find image at path {image}")
 
         if not isinstance(image, list):
             image = [image]
-        image_pts = []
-        mask_pts = []
+
+        outputs = []
         for img in image:
-            image_pt, mask_pt = self.image_processor(img, return_mask=True)
-            image_pts.append(image_pt)
-            mask_pts.append(mask_pt)
+            output = self.image_processor(img)
+            outputs.append(output)
 
-        image_pts = torch.cat(image_pts, dim=0).to(self.device, dtype=self.dtype)
-        if mask_pts[0] is not None:
-            mask_pts = torch.cat(mask_pts, dim=0).to(self.device, dtype=self.dtype)
-        else:
-            mask_pts = None
-        return image_pts, mask_pts
+        cond_input = {k: [] for k in outputs[0].keys()}
+        for output in outputs:
+            for key, value in output.items():
+                cond_input[key].append(value)
+        for key, value in cond_input.items():
+            if isinstance(value[0], torch.Tensor):
+                cond_input[key] = torch.cat(value, dim=0)
+
+        return cond_input
 
     def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
         """
@@ -388,6 +532,17 @@ class Hunyuan3DDiTPipeline:
         assert emb.shape == (w.shape[0], embedding_dim)
         return emb
 
+    def set_surface_extractor(self, mc_algo):
+        if mc_algo is None:
+            return
+        logger.info('The parameters `mc_algo` is deprecated, and will be removed in future versions.\n'
+                    'Please use: \n'
+                    'from hy3dgen.shapegen.models.autoencoders import SurfaceExtractors\n'
+                    'pipeline.vae.surface_extractor = SurfaceExtractors[mc_algo]() instead\n')
+        if mc_algo not in SurfaceExtractors.keys():
+            raise ValueError(f"Unknown mc_algo {mc_algo}")
+        self.vae.surface_extractor = SurfaceExtractors[mc_algo]()
+
     @torch.no_grad()
     def __call__(
         self,
@@ -404,7 +559,7 @@ class Hunyuan3DDiTPipeline:
         octree_resolution=384,
         mc_level=-1 / 512,
         num_chunks=8000,
-        mc_algo='mc',
+        mc_algo=None,
         output_type: Optional[str] = "trimesh",
         enable_pbar=True,
         **kwargs,
@@ -412,69 +567,72 @@ class Hunyuan3DDiTPipeline:
         callback = kwargs.pop("callback", None)
         callback_steps = kwargs.pop("callback_steps", None)
 
+        self.set_surface_extractor(mc_algo)
+
         device = self.device
         dtype = self.dtype
         do_classifier_free_guidance = guidance_scale >= 0 and \
                                       getattr(self.model, 'guidance_cond_proj_dim', None) is None
         dual_guidance = dual_guidance_scale >= 0 and dual_guidance
 
-        image, mask = self.prepare_image(image)
-        cond = self.encode_cond(image=image,
-                                mask=mask,
-                                do_classifier_free_guidance=do_classifier_free_guidance,
-                                dual_guidance=dual_guidance)
+        cond_inputs = self.prepare_image(image)
+        image = cond_inputs.pop('image')
+        cond = self.encode_cond(
+            image=image,
+            additional_cond_inputs=cond_inputs,
+            do_classifier_free_guidance=do_classifier_free_guidance,
+            dual_guidance=False,
+        )
         batch_size = image.shape[0]
 
         t_dtype = torch.long
-        scheduler = instantiate_from_config(self.kwargs['scheduler_cfg'])
         timesteps, num_inference_steps = retrieve_timesteps(
-            scheduler, num_inference_steps, device, timesteps, sigmas
-        )
+            self.scheduler, num_inference_steps, device, timesteps, sigmas)
 
         latents = self.prepare_latents(batch_size, dtype, device, generator)
         extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
 
         guidance_cond = None
         if getattr(self.model, 'guidance_cond_proj_dim', None) is not None:
-            print('Using lcm guidance scale')
+            logger.info('Using lcm guidance scale')
             guidance_scale_tensor = torch.tensor(guidance_scale - 1).repeat(batch_size)
             guidance_cond = self.get_guidance_scale_embedding(
                 guidance_scale_tensor, embedding_dim=self.model.guidance_cond_proj_dim
             ).to(device=device, dtype=latents.dtype)
-
-        for i, t in enumerate(tqdm(timesteps, disable=not enable_pbar, desc="Diffusion Sampling:", leave=False)):
-            # expand the latents if we are doing classifier free guidance
-            if do_classifier_free_guidance:
-                latent_model_input = torch.cat([latents] * (3 if dual_guidance else 2))
-            else:
-                latent_model_input = latents
-            latent_model_input = scheduler.scale_model_input(latent_model_input, t)
-
-            # predict the noise residual
-            timestep_tensor = torch.tensor([t], dtype=t_dtype, device=device)
-            timestep_tensor = timestep_tensor.expand(latent_model_input.shape[0])
-            noise_pred = self.model(latent_model_input, timestep_tensor, cond, guidance_cond=guidance_cond)
-
-            # no drop, drop clip, all drop
-            if do_classifier_free_guidance:
-                if dual_guidance:
-                    noise_pred_clip, noise_pred_dino, noise_pred_uncond = noise_pred.chunk(3)
-                    noise_pred = (
-                        noise_pred_uncond
-                        + guidance_scale * (noise_pred_clip - noise_pred_dino)
-                        + dual_guidance_scale * (noise_pred_dino - noise_pred_uncond)
-                    )
+        with synchronize_timer('Diffusion Sampling'):
+            for i, t in enumerate(tqdm(timesteps, disable=not enable_pbar, desc="Diffusion Sampling:", leave=False)):
+                # expand the latents if we are doing classifier free guidance
+                if do_classifier_free_guidance:
+                    latent_model_input = torch.cat([latents] * (3 if dual_guidance else 2))
                 else:
-                    noise_pred_cond, noise_pred_uncond = noise_pred.chunk(2)
-                    noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_cond - noise_pred_uncond)
-
-            # compute the previous noisy sample x_t -> x_t-1
-            outputs = scheduler.step(noise_pred, t, latents, **extra_step_kwargs)
-            latents = outputs.prev_sample
-
-            if callback is not None and i % callback_steps == 0:
-                step_idx = i // getattr(scheduler, "order", 1)
-                callback(step_idx, t, outputs)
+                    latent_model_input = latents
+                latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+                # predict the noise residual
+                timestep_tensor = torch.tensor([t], dtype=t_dtype, device=device)
+                timestep_tensor = timestep_tensor.expand(latent_model_input.shape[0])
+                noise_pred = self.model(latent_model_input, timestep_tensor, cond, guidance_cond=guidance_cond)
+
+                # no drop, drop clip, all drop
+                if do_classifier_free_guidance:
+                    if dual_guidance:
+                        noise_pred_clip, noise_pred_dino, noise_pred_uncond = noise_pred.chunk(3)
+                        noise_pred = (
+                            noise_pred_uncond
+                            + guidance_scale * (noise_pred_clip - noise_pred_dino)
+                            + dual_guidance_scale * (noise_pred_dino - noise_pred_uncond)
+                        )
+                    else:
+                        noise_pred_cond, noise_pred_uncond = noise_pred.chunk(2)
+                        noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_cond - noise_pred_uncond)
+
+                # compute the previous noisy sample x_t -> x_t-1
+                outputs = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs)
+                latents = outputs.prev_sample
+
+                if callback is not None and i % callback_steps == 0:
+                    step_idx = i // getattr(self.scheduler, "order", 1)
+                    callback(step_idx, t, outputs)
 
         return self._export(
             latents,
@@ -482,7 +640,17 @@ class Hunyuan3DDiTPipeline:
             box_v, mc_level, num_chunks, octree_resolution, mc_algo,
         )
 
-    def _export(self, latents, output_type, box_v, mc_level, num_chunks, octree_resolution, mc_algo):
+    def _export(
+        self,
+        latents,
+        output_type='trimesh',
+        box_v=1.01,
+        mc_level=0.0,
+        num_chunks=20000,
+        octree_resolution=256,
+        mc_algo='mc',
+        enable_pbar=True
+    ):
         if not output_type == "latent":
             latents = 1. / self.vae.scale_factor * latents
             latents = self.vae(latents)
@@ -493,6 +661,7 @@ class Hunyuan3DDiTPipeline:
                 num_chunks=num_chunks,
                 octree_resolution=octree_resolution,
                 mc_algo=mc_algo,
+                enable_pbar=enable_pbar,
             )
         else:
             outputs = latents
@@ -505,20 +674,20 @@ class Hunyuan3DDiTPipeline:
 
 class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline):
 
-    @torch.no_grad()
+    @torch.inference_mode()
     def __call__(
         self,
-        image: Union[str, List[str], Image.Image] = None,
+        image: Union[str, List[str], Image.Image, dict, List[dict]] = None,
         num_inference_steps: int = 50,
         timesteps: List[int] = None,
         sigmas: List[float] = None,
         eta: float = 0.0,
-        guidance_scale: float = 7.5,
+        guidance_scale: float = 5.0,
         generator=None,
         box_v=1.01,
         octree_resolution=384,
         mc_level=0.0,
-        mc_algo='mc',
+        mc_algo=None,
         num_chunks=8000,
         output_type: Optional[str] = "trimesh",
         enable_pbar=True,
@@ -527,6 +696,8 @@ class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline):
         callback = kwargs.pop("callback", None)
         callback_steps = kwargs.pop("callback_steps", None)
 
+        self.set_surface_extractor(mc_algo)
+
         device = self.device
         dtype = self.dtype
         do_classifier_free_guidance = guidance_scale >= 0 and not (
@@ -534,10 +705,11 @@ class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline):
             self.model.guidance_embed is True
         )
 
-        image, mask = self.prepare_image(image)
+        cond_inputs = self.prepare_image(image)
+        image = cond_inputs.pop('image')
         cond = self.encode_cond(
             image=image,
-            mask=mask,
+            additional_cond_inputs=cond_inputs,
             do_classifier_free_guidance=do_classifier_free_guidance,
             dual_guidance=False,
         )
@@ -546,9 +718,8 @@ class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline):
         # 5. Prepare timesteps
         # NOTE: this is slightly different from common usage, we start from 0.
         sigmas = np.linspace(0, 1, num_inference_steps) if sigmas is None else sigmas
-        scheduler = instantiate_from_config(self.kwargs['scheduler_cfg'])
         timesteps, num_inference_steps = retrieve_timesteps(
-            scheduler,
+            self.scheduler,
             num_inference_steps,
             device,
             sigmas=sigmas,
@@ -559,34 +730,36 @@ class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline):
         if hasattr(self.model, 'guidance_embed') and \
             self.model.guidance_embed is True:
             guidance = torch.tensor([guidance_scale] * batch_size, device=device, dtype=dtype)
+            # logger.info(f'Using guidance embed with scale {guidance_scale}')
 
-        for i, t in enumerate(tqdm(timesteps, disable=not enable_pbar, desc="Diffusion Sampling:")):
-            # expand the latents if we are doing classifier free guidance
-            if do_classifier_free_guidance:
-                latent_model_input = torch.cat([latents] * 2)
-            else:
-                latent_model_input = latents
+        with synchronize_timer('Diffusion Sampling'):
+            for i, t in enumerate(tqdm(timesteps, disable=not enable_pbar, desc="Diffusion Sampling:")):
+                # expand the latents if we are doing classifier free guidance
+                if do_classifier_free_guidance:
+                    latent_model_input = torch.cat([latents] * 2)
+                else:
+                    latent_model_input = latents
 
-            # NOTE: we assume model get timesteps ranged from 0 to 1
-            timestep = t.expand(latent_model_input.shape[0]).to(
-                latents.dtype) / scheduler.config.num_train_timesteps
-            noise_pred = self.model(latent_model_input, timestep, cond, guidance=guidance)
+                # NOTE: we assume model get timesteps ranged from 0 to 1
+                timestep = t.expand(latent_model_input.shape[0]).to(
+                    latents.dtype) / self.scheduler.config.num_train_timesteps
+                noise_pred = self.model(latent_model_input, timestep, cond, guidance=guidance)
 
-            if do_classifier_free_guidance:
-                noise_pred_cond, noise_pred_uncond = noise_pred.chunk(2)
-                noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_cond - noise_pred_uncond)
+                if do_classifier_free_guidance:
+                    noise_pred_cond, noise_pred_uncond = noise_pred.chunk(2)
+                    noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_cond - noise_pred_uncond)
 
-            # compute the previous noisy sample x_t -> x_t-1
-            outputs = scheduler.step(noise_pred, t, latents)
-            latents = outputs.prev_sample
+                # compute the previous noisy sample x_t -> x_t-1
+                outputs = self.scheduler.step(noise_pred, t, latents)
+                latents = outputs.prev_sample
 
-            if callback is not None and i % callback_steps == 0:
-                step_idx = i // getattr(scheduler, "order", 1)
-                callback(step_idx, t, outputs)
+                if callback is not None and i % callback_steps == 0:
+                    step_idx = i // getattr(self.scheduler, "order", 1)
+                    callback(step_idx, t, outputs)
 
         return self._export(
             latents,
             output_type,
             box_v, mc_level, num_chunks, octree_resolution, mc_algo,
+            enable_pbar=enable_pbar,
         )
-
diff --git a/hy3dgen/shapegen/postprocessors.py b/hy3dgen/shapegen/postprocessors.py
old mode 100644
new mode 100755
index 1843817b02a5355cf0a5e40fae2e476bee61a326..d258369e2b9116090ade3c955d57215d1481138e
--- a/hy3dgen/shapegen/postprocessors.py
+++ b/hy3dgen/shapegen/postprocessors.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -22,13 +12,17 @@
 # fine-tuning enabling code and other elements of the foregoing made publicly available
 # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
+import os
 import tempfile
 from typing import Union
 
+import numpy as np
 import pymeshlab
+import torch
 import trimesh
 
-from .models.vae import Latent2MeshOutput
+from .models.autoencoders import Latent2MeshOutput
+from .utils import synchronize_timer
 
 
 def load_mesh(path):
@@ -41,6 +35,9 @@ def load_mesh(path):
 
 
 def reduce_face(mesh: pymeshlab.MeshSet, max_facenum: int = 200000):
+    if max_facenum > mesh.current_mesh().face_number():
+        return mesh
+
     mesh.apply_filter(
         "meshing_decimation_quadric_edge_collapse",
         targetfacenum=max_facenum,
@@ -63,7 +60,7 @@ def remove_floater(mesh: pymeshlab.MeshSet):
 
 
 def pymeshlab2trimesh(mesh: pymeshlab.MeshSet):
-    with tempfile.NamedTemporaryFile(suffix='.ply', delete=True) as temp_file:
+    with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file:
         mesh.save_current_mesh(temp_file.name)
         mesh = trimesh.load(temp_file.name)
     # 检查加载的对象类型
@@ -77,7 +74,7 @@ def pymeshlab2trimesh(mesh: pymeshlab.MeshSet):
 
 
 def trimesh2pymeshlab(mesh: trimesh.Trimesh):
-    with tempfile.NamedTemporaryFile(suffix='.ply', delete=True) as temp_file:
+    with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file:
         if isinstance(mesh, trimesh.scene.Scene):
             for idx, obj in enumerate(mesh.geometry.values()):
                 if idx == 0:
@@ -119,6 +116,7 @@ def import_mesh(mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutpu
 
 
 class FaceReducer:
+    @synchronize_timer('FaceReducer')
     def __call__(
         self,
         mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str],
@@ -131,6 +129,7 @@ class FaceReducer:
 
 
 class FloaterRemover:
+    @synchronize_timer('FloaterRemover')
     def __call__(
         self,
         mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str],
@@ -142,16 +141,62 @@ class FloaterRemover:
 
 
 class DegenerateFaceRemover:
+    @synchronize_timer('DegenerateFaceRemover')
     def __call__(
         self,
         mesh: Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput, str],
     ) -> Union[pymeshlab.MeshSet, trimesh.Trimesh, Latent2MeshOutput]:
         ms = import_mesh(mesh)
 
-        with tempfile.NamedTemporaryFile(suffix='.ply', delete=True) as temp_file:
+        with tempfile.NamedTemporaryFile(suffix='.ply', delete=False) as temp_file:
             ms.save_current_mesh(temp_file.name)
             ms = pymeshlab.MeshSet()
             ms.load_new_mesh(temp_file.name)
 
         mesh = export_mesh(mesh, ms)
         return mesh
+
+
+def mesh_normalize(mesh):
+    """
+    Normalize mesh vertices to sphere
+    """
+    scale_factor = 1.2
+    vtx_pos = np.asarray(mesh.vertices)
+    max_bb = (vtx_pos - 0).max(0)[0]
+    min_bb = (vtx_pos - 0).min(0)[0]
+
+    center = (max_bb + min_bb) / 2
+
+    scale = torch.norm(torch.tensor(vtx_pos - center, dtype=torch.float32), dim=1).max() * 2.0
+
+    vtx_pos = (vtx_pos - center) * (scale_factor / float(scale))
+    mesh.vertices = vtx_pos
+
+    return mesh
+
+
+class MeshSimplifier:
+    def __init__(self, executable: str = None):
+        if executable is None:
+            CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+            executable = os.path.join(CURRENT_DIR, "mesh_simplifier.bin")
+        self.executable = executable
+
+    @synchronize_timer('MeshSimplifier')
+    def __call__(
+        self,
+        mesh: Union[trimesh.Trimesh],
+    ) -> Union[trimesh.Trimesh]:
+        with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as temp_input:
+            with tempfile.NamedTemporaryFile(suffix='.obj', delete=False) as temp_output:
+                mesh.export(temp_input.name)
+                os.system(f'{self.executable} {temp_input.name} {temp_output.name}')
+                ms = trimesh.load(temp_output.name, process=False)
+                if isinstance(ms, trimesh.Scene):
+                    combined_mesh = trimesh.Trimesh()
+                    for geom in ms.geometry.values():
+                        combined_mesh = trimesh.util.concatenate([combined_mesh, geom])
+                    ms = combined_mesh
+                ms = mesh_normalize(ms)
+                return ms
diff --git a/hy3dgen/shapegen/preprocessors.py b/hy3dgen/shapegen/preprocessors.py
old mode 100644
new mode 100755
index 2bdaff2d16cc0844d8d23c886d35c2f4e7286ff7..8a9cb9ea1591363fb77f4e02351a0f945e3bc1ab
--- a/hy3dgen/shapegen/preprocessors.py
+++ b/hy3dgen/shapegen/preprocessors.py
@@ -1,12 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -87,7 +78,7 @@ class ImageProcessorV2:
                                                           interpolation=cv2.INTER_AREA)
 
         bg = np.ones((result.shape[0], result.shape[1], 3), dtype=np.uint8) * 255
-        # bg = np.zeros((result.shape[0], result.shape[1], 3), dtype=np.uint8) * 255
+
         mask = result[..., 3:].astype(np.float32) / 255
         result = result[..., :3] * mask + bg * (1 - mask)
 
@@ -96,15 +87,13 @@ class ImageProcessorV2:
         mask = mask.clip(0, 255).astype(np.uint8)
         return result, mask
 
-    def __call__(self, image, border_ratio=0.15, to_tensor=True, return_mask=False, **kwargs):
-        if self.border_ratio is not None:
-            border_ratio = self.border_ratio
-            print(f"Using border_ratio from init: {border_ratio}")
+    def load_image(self, image, border_ratio=0.15, to_tensor=True):
         if isinstance(image, str):
             image = cv2.imread(image, cv2.IMREAD_UNCHANGED)
             image, mask = self.recenter(image, border_ratio=border_ratio)
             image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
         elif isinstance(image, Image.Image):
+            image = image.convert("RGBA")
             image = np.asarray(image)
             image, mask = self.recenter(image, border_ratio=border_ratio)
 
@@ -115,13 +104,64 @@ class ImageProcessorV2:
         if to_tensor:
             image = array_to_tensor(image)
             mask = array_to_tensor(mask)
-        if return_mask:
-            return image, mask
-        return image
+        return image, mask
+
+    def __call__(self, image, border_ratio=0.15, to_tensor=True, **kwargs):
+        if self.border_ratio is not None:
+            border_ratio = self.border_ratio
+        image, mask = self.load_image(image, border_ratio=border_ratio, to_tensor=to_tensor)
+        outputs = {
+            'image': image,
+            'mask': mask
+        }
+        return outputs
+
+
+class MVImageProcessorV2(ImageProcessorV2):
+    """
+    view order: front, front clockwise 90, back, front clockwise 270
+    """
+    return_view_idx = True
+
+    def __init__(self, size=512, border_ratio=None):
+        super().__init__(size, border_ratio)
+        self.view2idx = {
+            'front': 0,
+            'left': 1,
+            'back': 2,
+            'right': 3
+        }
+
+    def __call__(self, image_dict, border_ratio=0.15, to_tensor=True, **kwargs):
+        if self.border_ratio is not None:
+            border_ratio = self.border_ratio
+
+        images = []
+        masks = []
+        view_idxs = []
+        for idx, (view_tag, image) in enumerate(image_dict.items()):
+            view_idxs.append(self.view2idx[view_tag])
+            image, mask = self.load_image(image, border_ratio=border_ratio, to_tensor=to_tensor)
+            images.append(image)
+            masks.append(mask)
+
+        zipped_lists = zip(view_idxs, images, masks)
+        sorted_zipped_lists = sorted(zipped_lists)
+        view_idxs, images, masks = zip(*sorted_zipped_lists)
+
+        image = torch.cat(images, 0).unsqueeze(0)
+        mask = torch.cat(masks, 0).unsqueeze(0)
+        outputs = {
+            'image': image,
+            'mask': mask,
+            'view_idxs': view_idxs
+        }
+        return outputs
 
 
 IMAGE_PROCESSORS = {
     "v2": ImageProcessorV2,
+    'mv_v2': MVImageProcessorV2,
 }
 
 DEFAULT_IMAGEPROCESSOR = 'v2'
diff --git a/hy3dgen/shapegen/schedulers.py b/hy3dgen/shapegen/schedulers.py
old mode 100644
new mode 100755
index 0069f5cd49c5095930b588f01129a77f172171a7..6ef364360e13afae03c39e49f4fa6da963c970d4
--- a/hy3dgen/shapegen/schedulers.py
+++ b/hy3dgen/shapegen/schedulers.py
@@ -12,6 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+
 import math
 from dataclasses import dataclass
 from typing import List, Optional, Tuple, Union
@@ -305,3 +319,162 @@ class FlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
 
     def __len__(self):
         return self.config.num_train_timesteps
+
+
+@dataclass
+class ConsistencyFlowMatchEulerDiscreteSchedulerOutput(BaseOutput):
+    prev_sample: torch.FloatTensor
+    pred_original_sample: torch.FloatTensor
+
+
+class ConsistencyFlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
+    _compatibles = []
+    order = 1
+
+    @register_to_config
+    def __init__(
+        self,
+        num_train_timesteps: int = 1000,
+        pcm_timesteps: int = 50,
+    ):
+        sigmas = np.linspace(0, 1, num_train_timesteps)
+        step_ratio = num_train_timesteps // pcm_timesteps
+
+        euler_timesteps = (np.arange(1, pcm_timesteps) * step_ratio).round().astype(np.int64) - 1
+        euler_timesteps = np.asarray([0] + euler_timesteps.tolist())
+
+        self.euler_timesteps = euler_timesteps
+        self.sigmas = sigmas[self.euler_timesteps]
+        self.sigmas = torch.from_numpy((self.sigmas.copy()))
+        self.timesteps = self.sigmas * num_train_timesteps
+        self._step_index = None
+        self._begin_index = None
+        self.sigmas = self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication
+
+    @property
+    def step_index(self):
+        """
+        The index counter for current timestep. It will increase 1 after each scheduler step.
+        """
+        return self._step_index
+
+    @property
+    def begin_index(self):
+        """
+        The index for the first timestep. It should be set from pipeline with `set_begin_index` method.
+        """
+        return self._begin_index
+
+    # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index
+    def set_begin_index(self, begin_index: int = 0):
+        """
+        Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
+
+        Args:
+            begin_index (`int`):
+                The begin index for the scheduler.
+        """
+        self._begin_index = begin_index
+
+    def _sigma_to_t(self, sigma):
+        return sigma * self.config.num_train_timesteps
+
+    def set_timesteps(
+        self,
+        num_inference_steps: int = None,
+        device: Union[str, torch.device] = None,
+        sigmas: Optional[List[float]] = None,
+    ):
+        """
+        Sets the discrete timesteps used for the diffusion chain (to be run before inference).
+
+        Args:
+            num_inference_steps (`int`):
+                The number of diffusion steps used when generating samples with a pre-trained model.
+            device (`str` or `torch.device`, *optional*):
+                The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
+        """
+        self.num_inference_steps = num_inference_steps if num_inference_steps is not None else len(sigmas)
+        inference_indices = np.linspace(
+            0, self.config.pcm_timesteps, num=self.num_inference_steps, endpoint=False
+        )
+        inference_indices = np.floor(inference_indices).astype(np.int64)
+        inference_indices = torch.from_numpy(inference_indices).long()
+
+        self.sigmas_ = self.sigmas[inference_indices]
+        timesteps = self.sigmas_ * self.config.num_train_timesteps
+        self.timesteps = timesteps.to(device=device)
+        self.sigmas_ = torch.cat(
+            [self.sigmas_, torch.ones(1, device=self.sigmas_.device)]
+        )
+
+        self._step_index = None
+        self._begin_index = None
+
+    def index_for_timestep(self, timestep, schedule_timesteps=None):
+        if schedule_timesteps is None:
+            schedule_timesteps = self.timesteps
+
+        indices = (schedule_timesteps == timestep).nonzero()
+
+        # The sigma index that is taken for the **very** first `step`
+        # is always the second index (or the last index if there is only 1)
+        # This way we can ensure we don't accidentally skip a sigma in
+        # case we start in the middle of the denoising schedule (e.g. for image-to-image)
+        pos = 1 if len(indices) > 1 else 0
+
+        return indices[pos].item()
+
+    def _init_step_index(self, timestep):
+        if self.begin_index is None:
+            if isinstance(timestep, torch.Tensor):
+                timestep = timestep.to(self.timesteps.device)
+            self._step_index = self.index_for_timestep(timestep)
+        else:
+            self._step_index = self._begin_index
+
+    def step(
+        self,
+        model_output: torch.FloatTensor,
+        timestep: Union[float, torch.FloatTensor],
+        sample: torch.FloatTensor,
+        generator: Optional[torch.Generator] = None,
+        return_dict: bool = True,
+    ) -> Union[ConsistencyFlowMatchEulerDiscreteSchedulerOutput, Tuple]:
+        if (
+            isinstance(timestep, int)
+            or isinstance(timestep, torch.IntTensor)
+            or isinstance(timestep, torch.LongTensor)
+        ):
+            raise ValueError(
+                (
+                    "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to"
+                    " `EulerDiscreteScheduler.step()` is not supported. Make sure to pass"
+                    " one of the `scheduler.timesteps` as a timestep."
+                ),
+            )
+
+        if self.step_index is None:
+            self._init_step_index(timestep)
+
+        sample = sample.to(torch.float32)
+
+        sigma = self.sigmas_[self.step_index]
+        sigma_next = self.sigmas_[self.step_index + 1]
+
+        prev_sample = sample + (sigma_next - sigma) * model_output
+        prev_sample = prev_sample.to(model_output.dtype)
+
+        pred_original_sample = sample + (1.0 - sigma) * model_output
+        pred_original_sample = pred_original_sample.to(model_output.dtype)
+
+        self._step_index += 1
+
+        if not return_dict:
+            return (prev_sample,)
+
+        return ConsistencyFlowMatchEulerDiscreteSchedulerOutput(prev_sample=prev_sample,
+                                                                pred_original_sample=pred_original_sample)
+
+    def __len__(self):
+        return self.config.num_train_timesteps
diff --git a/hy3dgen/shapegen/utils.py b/hy3dgen/shapegen/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ac8f5d42c47df0dc765e2cd28b352bed0a2dd8e
--- /dev/null
+++ b/hy3dgen/shapegen/utils.py
@@ -0,0 +1,126 @@
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+
+import logging
+import os
+from functools import wraps
+
+import torch
+
+
+def get_logger(name):
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.INFO)
+
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(logging.INFO)
+
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    console_handler.setFormatter(formatter)
+    logger.addHandler(console_handler)
+    return logger
+
+
+logger = get_logger('hy3dgen.shapgen')
+
+
+class synchronize_timer:
+    """ Synchronized timer to count the inference time of `nn.Module.forward`.
+
+        Supports both context manager and decorator usage.
+
+        Example as context manager:
+        ```python
+        with synchronize_timer('name') as t:
+            run()
+        ```
+
+        Example as decorator:
+        ```python
+        @synchronize_timer('Export to trimesh')
+        def export_to_trimesh(mesh_output):
+            pass
+        ```
+    """
+
+    def __init__(self, name=None):
+        self.name = name
+
+    def __enter__(self):
+        """Context manager entry: start timing."""
+        if os.environ.get('HY3DGEN_DEBUG', '0') == '1':
+            self.start = torch.cuda.Event(enable_timing=True)
+            self.end = torch.cuda.Event(enable_timing=True)
+            self.start.record()
+            return lambda: self.time
+
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        """Context manager exit: stop timing and log results."""
+        if os.environ.get('HY3DGEN_DEBUG', '0') == '1':
+            self.end.record()
+            torch.cuda.synchronize()
+            self.time = self.start.elapsed_time(self.end)
+            if self.name is not None:
+                logger.info(f'{self.name} takes {self.time} ms')
+
+    def __call__(self, func):
+        """Decorator: wrap the function to time its execution."""
+
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            with self:
+                result = func(*args, **kwargs)
+            return result
+
+        return wrapper
+
+
+def smart_load_model(
+    model_path,
+    subfolder,
+    use_safetensors,
+    variant,
+):
+    original_model_path = model_path
+    # try local path
+    base_dir = os.environ.get('HY3DGEN_MODELS', '~/.cache/hy3dgen')
+    model_path = os.path.expanduser(os.path.join(base_dir, model_path, subfolder))
+    logger.info(f'Try to load model from local path: {model_path}')
+    if not os.path.exists(model_path):
+        logger.info('Model path not exists, try to download from huggingface')
+        try:
+            from huggingface_hub import snapshot_download
+            # 只下载指定子目录
+            path = snapshot_download(
+                repo_id=original_model_path,
+                allow_patterns=[f"{subfolder}/*"],  # 关键修改：模式匹配子文件夹
+            )
+            model_path = os.path.join(path, subfolder)  # 保持路径拼接逻辑不变
+        except ImportError:
+            logger.warning(
+                "You need to install HuggingFace Hub to load models from the hub."
+            )
+            raise RuntimeError(f"Model path {model_path} not found")
+        except Exception as e:
+            raise e
+
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f"Model path {original_model_path} not found")
+
+    extension = 'ckpt' if not use_safetensors else 'safetensors'
+    variant = '' if variant is None else f'.{variant}'
+    ckpt_name = f'model{variant}.{extension}'
+    config_path = os.path.join(model_path, 'config.yaml')
+    ckpt_path = os.path.join(model_path, ckpt_name)
+    return config_path, ckpt_path
diff --git a/hy3dgen/texgen/__init__.py b/hy3dgen/texgen/__init__.py
index 1f890f024d507021eca8087d40dc472de36152bd..7054c5797257839532eb03b05a474ddf020b1695 100644
--- a/hy3dgen/texgen/__init__.py
+++ b/hy3dgen/texgen/__init__.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
diff --git a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/io_glb.py b/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/io_glb.py
deleted file mode 100644
index c5d7dc8c6127e62848dda8e79fdc281c5a7b42cb..0000000000000000000000000000000000000000
--- a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/io_glb.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-
-import base64
-import io
-import os
-
-import numpy as np
-from PIL import Image as PILImage
-from pygltflib import GLTF2
-from scipy.spatial.transform import Rotation as R
-
-
-# Function to extract buffer data
-def get_buffer_data(gltf, buffer_view):
-    buffer = gltf.buffers[buffer_view.buffer]
-    buffer_data = gltf.get_data_from_buffer_uri(buffer.uri)
-    byte_offset = buffer_view.byteOffset if buffer_view.byteOffset else 0
-    byte_length = buffer_view.byteLength
-    return buffer_data[byte_offset:byte_offset + byte_length]
-
-
-# Function to extract attribute data
-def get_attribute_data(gltf, accessor_index):
-    accessor = gltf.accessors[accessor_index]
-    buffer_view = gltf.bufferViews[accessor.bufferView]
-    buffer_data = get_buffer_data(gltf, buffer_view)
-
-    comptype = {5120: np.int8, 5121: np.uint8, 5122: np.int16, 5123: np.uint16, 5125: np.uint32, 5126: np.float32}
-    dtype = comptype[accessor.componentType]
-
-    t2n = {'SCALAR': 1, 'VEC2': 2, 'VEC3': 3, 'VEC4': 4, 'MAT2': 4, 'MAT3': 9, 'MAT4': 16}
-    num_components = t2n[accessor.type]
-
-    # Calculate the correct slice of data
-    byte_offset = accessor.byteOffset if accessor.byteOffset else 0
-    byte_stride = buffer_view.byteStride if buffer_view.byteStride else num_components * np.dtype(dtype).itemsize
-    count = accessor.count
-
-    # Extract the attribute data
-    attribute_data = np.zeros((count, num_components), dtype=dtype)
-    for i in range(count):
-        start = byte_offset + i * byte_stride
-        end = start + num_components * np.dtype(dtype).itemsize
-        attribute_data[i] = np.frombuffer(buffer_data[start:end], dtype=dtype)
-
-    return attribute_data
-
-
-# Function to extract image data
-def get_image_data(gltf, image, folder):
-    if image.uri:
-        if image.uri.startswith('data:'):
-            # Data URI
-            header, encoded = image.uri.split(',', 1)
-            data = base64.b64decode(encoded)
-        else:
-            # External file
-            fn = image.uri
-            if not os.path.isabs(fn):
-                fn = folder + '/' + fn
-            with open(fn, 'rb') as f:
-                data = f.read()
-    else:
-        buffer_view = gltf.bufferViews[image.bufferView]
-        data = get_buffer_data(gltf, buffer_view)
-    return data
-
-
-# Function to convert triangle strip to triangles
-def convert_triangle_strip_to_triangles(indices):
-    triangles = []
-    for i in range(len(indices) - 2):
-        if i % 2 == 0:
-            triangles.append([indices[i], indices[i + 1], indices[i + 2]])
-        else:
-            triangles.append([indices[i], indices[i + 2], indices[i + 1]])
-    return np.array(triangles).reshape(-1, 3)
-
-
-# Function to convert triangle fan to triangles
-def convert_triangle_fan_to_triangles(indices):
-    triangles = []
-    for i in range(1, len(indices) - 1):
-        triangles.append([indices[0], indices[i], indices[i + 1]])
-    return np.array(triangles).reshape(-1, 3)
-
-
-# Function to get the transformation matrix from a node
-def get_node_transform(node):
-    if node.matrix:
-        return np.array(node.matrix).reshape(4, 4).T
-    else:
-        T = np.eye(4)
-        if node.translation:
-            T[:3, 3] = node.translation
-        if node.rotation:
-            R_mat = R.from_quat(node.rotation).as_matrix()
-            T[:3, :3] = R_mat
-        if node.scale:
-            S = np.diag(node.scale + [1])
-            T = T @ S
-        return T
-
-
-def get_world_transform(gltf, node_index, parents, world_transforms):
-    if parents[node_index] == -2:
-        return world_transforms[node_index]
-
-    node = gltf.nodes[node_index]
-    if parents[node_index] == -1:
-        world_transforms[node_index] = get_node_transform(node)
-        parents[node_index] = -2
-        return world_transforms[node_index]
-
-    parent_index = parents[node_index]
-    parent_transform = get_world_transform(gltf, parent_index, parents, world_transforms)
-    world_transforms[node_index] = parent_transform @ get_node_transform(node)
-    parents[node_index] = -2
-    return world_transforms[node_index]
-
-
-def LoadGlb(path):
-    # Load the GLB file using pygltflib
-    gltf = GLTF2().load(path)
-
-    primitives = []
-    images = {}
-    # Iterate through the meshes in the GLB file
-
-    world_transforms = [np.identity(4) for i in range(len(gltf.nodes))]
-    parents = [-1 for i in range(len(gltf.nodes))]
-    for node_index, node in enumerate(gltf.nodes):
-        for idx in node.children:
-            parents[idx] = node_index
-    # for i in range(len(gltf.nodes)):
-    #    get_world_transform(gltf, i, parents, world_transform)
-
-    for node_index, node in enumerate(gltf.nodes):
-        if node.mesh is not None:
-            world_transform = get_world_transform(gltf, node_index, parents, world_transforms)
-            # Iterate through the primitives in the mesh
-            mesh = gltf.meshes[node.mesh]
-            for primitive in mesh.primitives:
-                # Access the attributes of the primitive
-                attributes = primitive.attributes.__dict__
-                mode = primitive.mode if primitive.mode is not None else 4  # Default to TRIANGLES
-                result = {}
-                if primitive.indices is not None:
-                    indices = get_attribute_data(gltf, primitive.indices)
-                    if mode == 4:  # TRIANGLES
-                        face_indices = indices.reshape(-1, 3)
-                    elif mode == 5:  # TRIANGLE_STRIP
-                        face_indices = convert_triangle_strip_to_triangles(indices)
-                    elif mode == 6:  # TRIANGLE_FAN
-                        face_indices = convert_triangle_fan_to_triangles(indices)
-                    else:
-                        continue
-                    result['F'] = face_indices
-
-                # Extract vertex positions
-                if 'POSITION' in attributes and attributes['POSITION'] is not None:
-                    positions = get_attribute_data(gltf, attributes['POSITION'])
-                    # Apply the world transformation to the positions
-                    positions_homogeneous = np.hstack([positions, np.ones((positions.shape[0], 1))])
-                    transformed_positions = (world_transform @ positions_homogeneous.T).T[:, :3]
-                    result['V'] = transformed_positions
-
-                # Extract vertex colors
-                if 'COLOR_0' in attributes and attributes['COLOR_0'] is not None:
-                    colors = get_attribute_data(gltf, attributes['COLOR_0'])
-                    if colors.shape[-1] > 3:
-                        colors = colors[..., :3]
-                    result['VC'] = colors
-
-                # Extract UVs
-                if 'TEXCOORD_0' in attributes and not attributes['TEXCOORD_0'] is None:
-                    uvs = get_attribute_data(gltf, attributes['TEXCOORD_0'])
-                    result['UV'] = uvs
-
-                if primitive.material is not None:
-                    material = gltf.materials[primitive.material]
-                    if material.pbrMetallicRoughness is not None and material.pbrMetallicRoughness.baseColorTexture is not None:
-                        texture_index = material.pbrMetallicRoughness.baseColorTexture.index
-                        texture = gltf.textures[texture_index]
-                        image_index = texture.source
-                        if not image_index in images:
-                            image = gltf.images[image_index]
-                            image_data = get_image_data(gltf, image, os.path.dirname(path))
-                            pil_image = PILImage.open(io.BytesIO(image_data))
-                            if pil_image.mode != 'RGB':
-                                pil_image = pil_image.convert('RGB')
-                            images[image_index] = pil_image
-                        result['TEX'] = image_index
-                    elif material.emissiveTexture is not None:
-                        texture_index = material.emissiveTexture.index
-                        texture = gltf.textures[texture_index]
-                        image_index = texture.source
-                        if not image_index in images:
-                            image = gltf.images[image_index]
-                            image_data = get_image_data(gltf, image, os.path.dirname(path))
-                            pil_image = PILImage.open(io.BytesIO(image_data))
-                            if pil_image.mode != 'RGB':
-                                pil_image = pil_image.convert('RGB')
-                            images[image_index] = pil_image
-                        result['TEX'] = image_index
-                    else:
-                        if material.pbrMetallicRoughness is not None:
-                            base_color = material.pbrMetallicRoughness.baseColorFactor
-                        else:
-                            base_color = np.array([0.8, 0.8, 0.8], dtype=np.float32)
-                        result['MC'] = base_color
-
-                primitives.append(result)
-
-    return primitives, images
-
-
-def RotatePrimitives(primitives, transform):
-    for i in range(len(primitives)):
-        if 'V' in primitives[i]:
-            primitives[i]['V'] = primitives[i]['V'] @ transform.T
-
-
-if __name__ == '__main__':
-    path = 'data/test.glb'
-    LoadGlb(path)
diff --git a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/io_obj.py b/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/io_obj.py
deleted file mode 100644
index a72c478d8efcb9a3d71a67ce5f167559ef76b922..0000000000000000000000000000000000000000
--- a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/io_obj.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-
-import cv2
-import numpy as np
-
-
-def LoadObj(fn):
-    lines = [l.strip() for l in open(fn)]
-    vertices = []
-    faces = []
-    for l in lines:
-        words = [w for w in l.split(' ') if w != '']
-        if len(words) == 0:
-            continue
-        if words[0] == 'v':
-            v = [float(words[i]) for i in range(1, 4)]
-            vertices.append(v)
-        elif words[0] == 'f':
-            f = [int(words[i]) - 1 for i in range(1, 4)]
-            faces.append(f)
-
-    return np.array(vertices).astype('float32'), np.array(faces).astype('int32')
-
-
-def LoadObjWithTexture(fn, tex_fn):
-    lines = [l.strip() for l in open(fn)]
-    vertices = []
-    vertex_textures = []
-    faces = []
-    face_textures = []
-    for l in lines:
-        words = [w for w in l.split(' ') if w != '']
-        if len(words) == 0:
-            continue
-        if words[0] == 'v':
-            v = [float(words[i]) for i in range(1, len(words))]
-            vertices.append(v)
-        elif words[0] == 'vt':
-            v = [float(words[i]) for i in range(1, len(words))]
-            vertex_textures.append(v)
-        elif words[0] == 'f':
-            f = []
-            ft = []
-            for i in range(1, len(words)):
-                t = words[i].split('/')
-                f.append(int(t[0]) - 1)
-                ft.append(int(t[1]) - 1)
-            for i in range(2, len(f)):
-                faces.append([f[0], f[i - 1], f[i]])
-                face_textures.append([ft[0], ft[i - 1], ft[i]])
-
-    tex_image = cv2.cvtColor(cv2.imread(tex_fn), cv2.COLOR_BGR2RGB)
-    return np.array(vertices).astype('float32'), np.array(vertex_textures).astype('float32'), np.array(faces).astype(
-        'int32'), np.array(face_textures).astype('int32'), tex_image
diff --git a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/render.py b/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/render.py
deleted file mode 100644
index 743d4aac4da9e1e18374ce712ac24d19e6788870..0000000000000000000000000000000000000000
--- a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/render.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-
-import custom_rasterizer_kernel
-import torch
-
-
-def rasterize(pos, tri, resolution, clamp_depth=torch.zeros(0), use_depth_prior=0):
-    assert (pos.device == tri.device)
-    findices, barycentric = custom_rasterizer_kernel.rasterize_image(pos[0], tri, clamp_depth, resolution[1],
-                                                                     resolution[0], 1e-6, use_depth_prior)
-    return findices, barycentric
-
-
-def interpolate(col, findices, barycentric, tri):
-    f = findices - 1 + (findices == 0)
-    vcol = col[0, tri.long()[f.long()]]
-    result = barycentric.view(*barycentric.shape, 1) * vcol
-    result = torch.sum(result, axis=-2)
-    return result.view(1, *result.shape)
diff --git a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer_kernel.cpython-311-x86_64-linux-gnu.so b/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer_kernel.cpython-311-x86_64-linux-gnu.so
deleted file mode 100644
index fae9d0b229821dfe744e1b7b70250848eaa60797..0000000000000000000000000000000000000000
Binary files a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer_kernel.cpython-311-x86_64-linux-gnu.so and /dev/null differ
diff --git a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/.ninja_deps b/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/.ninja_deps
deleted file mode 100644
index 0227139e664b127ab09b323a310ef5b67e038309..0000000000000000000000000000000000000000
Binary files a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/.ninja_deps and /dev/null differ
diff --git a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/.ninja_log b/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/.ninja_log
deleted file mode 100644
index 961073a8816d177520ae1b8a655f413b83678c12..0000000000000000000000000000000000000000
--- a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/.ninja_log
+++ /dev/null
@@ -1,4 +0,0 @@
-# ninja log v5
-5	12944	1737469910283155280	/apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer.o	6b1f5e5e4b199209
-4	13455	1737469910695486266	/apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/grid_neighbor.o	af3659b839e5e6e4
-6	34765	1737469932096669642	/apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer_gpu.o	f5d05646c31ca370
diff --git a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/build.ninja b/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/build.ninja
deleted file mode 100644
index fb26eea1e35d1f43eba8e2b4be3527f6072dce16..0000000000000000000000000000000000000000
--- a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/build.ninja
+++ /dev/null
@@ -1,34 +0,0 @@
-ninja_required_version = 1.3
-cxx = c++
-nvcc = /usr/local/cuda/bin/nvcc
-
-cflags = -pthread -B /opt/conda/envs/hunyuan3d-2-open/compiler_compat -DNDEBUG -fwrapv -O2 -Wall -fPIC -O2 -isystem /opt/conda/envs/hunyuan3d-2-open/include -fPIC -O2 -isystem /opt/conda/envs/hunyuan3d-2-open/include -fPIC -I/opt/conda/envs/hunyuan3d-2-open/lib/python3.11/site-packages/torch/include -I/opt/conda/envs/hunyuan3d-2-open/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -I/opt/conda/envs/hunyuan3d-2-open/lib/python3.11/site-packages/torch/include/TH -I/opt/conda/envs/hunyuan3d-2-open/lib/python3.11/site-packages/torch/include/THC -I/usr/local/cuda/include -I/opt/conda/envs/hunyuan3d-2-open/include/python3.11 -c
-post_cflags = -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=custom_rasterizer_kernel -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++17
-cuda_cflags = -I/opt/conda/envs/hunyuan3d-2-open/lib/python3.11/site-packages/torch/include -I/opt/conda/envs/hunyuan3d-2-open/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -I/opt/conda/envs/hunyuan3d-2-open/lib/python3.11/site-packages/torch/include/TH -I/opt/conda/envs/hunyuan3d-2-open/lib/python3.11/site-packages/torch/include/THC -I/usr/local/cuda/include -I/opt/conda/envs/hunyuan3d-2-open/include/python3.11 -c
-cuda_post_cflags = -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options ''"'"'-fPIC'"'"'' -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=custom_rasterizer_kernel -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_90,code=compute_90 -gencode=arch=compute_90,code=sm_90 -std=c++17
-cuda_dlink_post_cflags = 
-ldflags = 
-
-rule compile
-  command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags
-  depfile = $out.d
-  deps = gcc
-
-rule cuda_compile
-  depfile = $out.d
-  deps = gcc
-  command = $nvcc --generate-dependencies-with-compile --dependency-output $out.d $cuda_cflags -c $in -o $out $cuda_post_cflags
-
-
-
-
-
-build /apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/grid_neighbor.o: compile /apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/grid_neighbor.cpp
-build /apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer.o: compile /apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp
-build /apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer_gpu.o: cuda_compile /apdcephfs_cq5/share_300600172/huiwenshi/repos/Hunyuan3D-2-spaces/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu
-
-
-
-
-
-
diff --git a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/grid_neighbor.o b/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/grid_neighbor.o
deleted file mode 100644
index 372a5daca94d37bb722a058e89a13e2153bc6341..0000000000000000000000000000000000000000
Binary files a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/grid_neighbor.o and /dev/null differ
diff --git a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer.o b/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer.o
deleted file mode 100644
index ec8fed027a0fe9a3339c8aeb51bfbeaf3b47f570..0000000000000000000000000000000000000000
Binary files a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer.o and /dev/null differ
diff --git a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer_gpu.o b/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer_gpu.o
deleted file mode 100644
index 963b6b8213309be5897c47fd976db2df8edafb3a..0000000000000000000000000000000000000000
Binary files a/hy3dgen/texgen/custom_rasterizer/build/temp.linux-x86_64-cpython-311/lib/custom_rasterizer_kernel/rasterizer_gpu.o and /dev/null differ
diff --git a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/PKG-INFO b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/PKG-INFO
deleted file mode 100644
index 4fd8d7197973d690207193769b1355f2aab0f91d..0000000000000000000000000000000000000000
--- a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/PKG-INFO
+++ /dev/null
@@ -1,3 +0,0 @@
-Metadata-Version: 2.1
-Name: custom_rasterizer
-Version: 0.1
diff --git a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/SOURCES.txt b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/SOURCES.txt
deleted file mode 100644
index ca40e02e41f7ba071df02ce368bfefec2847a6ad..0000000000000000000000000000000000000000
--- a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/SOURCES.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-setup.py
-./custom_rasterizer/__init__.py
-./custom_rasterizer/io_glb.py
-./custom_rasterizer/io_obj.py
-./custom_rasterizer/render.py
-custom_rasterizer.egg-info/PKG-INFO
-custom_rasterizer.egg-info/SOURCES.txt
-custom_rasterizer.egg-info/dependency_links.txt
-custom_rasterizer.egg-info/top_level.txt
-lib/custom_rasterizer_kernel/grid_neighbor.cpp
-lib/custom_rasterizer_kernel/rasterizer.cpp
-lib/custom_rasterizer_kernel/rasterizer_gpu.cu
\ No newline at end of file
diff --git a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/dependency_links.txt b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/dependency_links.txt
deleted file mode 100644
index 8b137891791fe96927ad78e64b0aad7bded08bdc..0000000000000000000000000000000000000000
--- a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/dependency_links.txt
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/top_level.txt b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/top_level.txt
deleted file mode 100644
index 4880ad0e94189fc44fe2052edd5eaa0fcdbdb7e8..0000000000000000000000000000000000000000
--- a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer.egg-info/top_level.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-custom_rasterizer
-custom_rasterizer_kernel
diff --git a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/__init__.py b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/__init__.py
index df40dcc8d4819eb903263ff1faf70ce902eb7e07..f471e1ac289d962613553fed2ba6e177e5af3ab9 100644
--- a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/__init__.py
+++ b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/__init__.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
diff --git a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_glb.py b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_glb.py
index c5d7dc8c6127e62848dda8e79fdc281c5a7b42cb..606dd774b558857fb8a6773509fecd1f7da6e9f3 100644
--- a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_glb.py
+++ b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_glb.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
diff --git a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_obj.py b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_obj.py
index a72c478d8efcb9a3d71a67ce5f167559ef76b922..e40d50050456a3a3d5cb3fbed516c4d4bd0bdb8f 100644
--- a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_obj.py
+++ b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/io_obj.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
diff --git a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/render.py b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/render.py
index 743d4aac4da9e1e18374ce712ac24d19e6788870..2d4d3f7ee6ba13ff7df1000eb3dd3e978d2d6fc4 100644
--- a/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/render.py
+++ b/hy3dgen/texgen/custom_rasterizer/custom_rasterizer/render.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
diff --git a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/grid_neighbor.cpp b/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/grid_neighbor.cpp
deleted file mode 100644
index f02bcba5afd45a524143d06c972acb87c393fe97..0000000000000000000000000000000000000000
--- a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/grid_neighbor.cpp
+++ /dev/null
@@ -1,574 +0,0 @@
-#include "rasterizer.h"
-#include <fstream>
-
-inline int pos2key(float* p, int resolution) {
-    int x = (p[0] * 0.5 + 0.5) * resolution;
-    int y = (p[1] * 0.5 + 0.5) * resolution;
-    int z = (p[2] * 0.5 + 0.5) * resolution;
-    return (x * resolution + y) * resolution + z;
-}
-
-inline void key2pos(int key, int resolution, float* p) {
-    int x = key / resolution / resolution;
-    int y = key / resolution % resolution;
-    int z = key % resolution;
-    p[0] = ((x + 0.5) / resolution - 0.5) * 2;
-    p[1] = ((y + 0.5) / resolution - 0.5) * 2;
-    p[2] = ((z + 0.5) / resolution - 0.5) * 2;
-}
-
-inline void key2cornerpos(int key, int resolution, float* p) {
-    int x = key / resolution / resolution;
-    int y = key / resolution % resolution;
-    int z = key % resolution;
-    p[0] = ((x + 0.75) / resolution - 0.5) * 2;
-    p[1] = ((y + 0.25) / resolution - 0.5) * 2;
-    p[2] = ((z + 0.75) / resolution - 0.5) * 2;
-}
-
-inline float* pos_ptr(int l, int i, int j, torch::Tensor t) {
-    float* pdata = t.data_ptr<float>();
-    int height = t.size(1);
-    int width = t.size(2);
-    return &pdata[((l * height + i) * width + j) * 4];
-}
-
-struct Grid
-{
-    std::vector<int> seq2oddcorner;
-    std::vector<int> seq2evencorner;
-    std::vector<int> seq2grid;
-    std::vector<int> seq2normal;
-    std::vector<int> seq2neighbor;
-    std::unordered_map<int, int> grid2seq;
-    std::vector<int> downsample_seq;
-    int num_origin_seq;
-    int resolution;
-    int stride;
-};
-
-inline void pos_from_seq(Grid& grid, int seq, float* p) {
-    auto k = grid.seq2grid[seq];
-    key2pos(k, grid.resolution, p);
-}
-
-inline int fetch_seq(Grid& grid, int l, int i, int j, torch::Tensor pdata) {
-    float* p = pos_ptr(l, i, j, pdata);
-    if (p[3] == 0)
-        return -1;
-    auto key = pos2key(p, grid.resolution);
-    int seq = grid.grid2seq[key];
-    return seq;
-}
-
-inline int fetch_last_seq(Grid& grid, int i, int j, torch::Tensor pdata) {
-    int num_layers = pdata.size(0);
-    int l = 0;
-    int idx = fetch_seq(grid, l, i, j, pdata);
-    while (l < num_layers - 1) {
-        l += 1;
-        int new_idx = fetch_seq(grid, l, i, j, pdata);
-        if (new_idx == -1)
-            break;
-        idx = new_idx;
-    }
-    return idx;
-}
-
-inline int fetch_nearest_seq(Grid& grid, int i, int j, int dim, float d, torch::Tensor pdata) {
-    float p[3];
-    float max_dist = 1e10;
-    int best_idx = -1;
-    int num_layers = pdata.size(0);
-    for (int l = 0; l < num_layers; ++l) {
-        int idx = fetch_seq(grid, l, i, j, pdata);
-        if (idx == -1)
-            break;
-        pos_from_seq(grid, idx, p);
-        float dist = std::abs(d - p[(dim + 2) % 3]);
-        if (dist < max_dist) {
-            max_dist = dist;
-            best_idx = idx;
-        }
-    }
-    return best_idx;
-}
-
-inline int fetch_nearest_seq_layer(Grid& grid, int i, int j, int dim, float d, torch::Tensor pdata) {
-    float p[3];
-    float max_dist = 1e10;
-    int best_layer = -1;
-    int num_layers = pdata.size(0);
-    for (int l = 0; l < num_layers; ++l) {
-        int idx = fetch_seq(grid, l, i, j, pdata);
-        if (idx == -1)
-            break;
-        pos_from_seq(grid, idx, p);
-        float dist = std::abs(d - p[(dim + 2) % 3]);
-        if (dist < max_dist) {
-            max_dist = dist;
-            best_layer = l;
-        }
-    }
-    return best_layer;
-}
-
-void FetchNeighbor(Grid& grid, int seq, float* pos, int dim, int boundary_info, std::vector<torch::Tensor>& view_layer_positions,
-    int* output_indices)
-{
-    auto t = view_layer_positions[dim];
-    int height = t.size(1);
-    int width = t.size(2);
-    int top = 0;
-    int ci = 0;
-    int cj = 0;
-    if (dim == 0) {
-        ci = (pos[1]/2+0.5)*height;
-        cj = (pos[0]/2+0.5)*width;
-    }
-    else if (dim == 1) {
-        ci = (pos[1]/2+0.5)*height;
-        cj = (pos[2]/2+0.5)*width;
-    }
-    else {
-        ci = (-pos[2]/2+0.5)*height;
-        cj = (pos[0]/2+0.5)*width;
-    }
-    int stride = grid.stride;
-    for (int ni = ci + stride; ni >= ci - stride; ni -= stride) {
-        for (int nj = cj - stride; nj <= cj + stride; nj += stride) {
-            int idx = -1;
-            if (ni == ci && nj == cj)
-                idx = seq;
-            else if (!(ni < 0 || ni >= height || nj < 0 || nj >= width)) {
-                if (boundary_info == -1)
-                    idx = fetch_seq(grid, 0, ni, nj, t);
-                else if (boundary_info == 1)
-                    idx = fetch_last_seq(grid, ni, nj, t);
-                else
-                    idx = fetch_nearest_seq(grid, ni, nj, dim, pos[(dim + 2) % 3], t);
-            }
-            output_indices[top] = idx;
-            top += 1;
-        }
-    }
-}
-
-void DownsampleGrid(Grid& src, Grid& tar)
-{
-    src.downsample_seq.resize(src.seq2grid.size(), -1);
-    tar.resolution = src.resolution / 2;
-    tar.stride = src.stride * 2;
-    float pos[3];
-    std::vector<int> seq2normal_count;
-    for (int i = 0; i < src.seq2grid.size(); ++i) {
-        key2pos(src.seq2grid[i], src.resolution, pos);
-        int k = pos2key(pos, tar.resolution);
-        int s = seq2normal_count.size();
-        if (!tar.grid2seq.count(k)) {
-            tar.grid2seq[k] = tar.seq2grid.size();
-            tar.seq2grid.emplace_back(k);
-            seq2normal_count.emplace_back(0);
-            seq2normal_count.emplace_back(0);
-            seq2normal_count.emplace_back(0);
-            //tar.seq2normal.emplace_back(src.seq2normal[i]);
-        } else {
-            s = tar.grid2seq[k] * 3;
-        }
-        seq2normal_count[s + src.seq2normal[i]] += 1;
-        src.downsample_seq[i] = tar.grid2seq[k];
-    }
-    tar.seq2normal.resize(seq2normal_count.size() / 3);
-    for (int i = 0; i < seq2normal_count.size(); i += 3) {
-        int t = 0;
-        for (int j = 1; j < 3; ++j) {
-            if (seq2normal_count[i + j] > seq2normal_count[i + t])
-                t = j;
-        }
-        tar.seq2normal[i / 3] = t;
-    }
-}
-
-void NeighborGrid(Grid& grid, std::vector<torch::Tensor> view_layer_positions, int v)
-{
-    grid.seq2evencorner.resize(grid.seq2grid.size(), 0);
-    grid.seq2oddcorner.resize(grid.seq2grid.size(), 0);
-    std::unordered_set<int> visited_seq;
-    for (int vd = 0; vd < 3; ++vd) {
-        auto t = view_layer_positions[vd];
-        auto t0 = view_layer_positions[v];
-        int height = t.size(1);
-        int width = t.size(2);
-        int num_layers = t.size(0);
-        int num_view_layers = t0.size(0);
-        for (int i = 0; i < height; ++i) {
-            for (int j = 0; j < width; ++j) {
-                for (int l = 0; l < num_layers; ++l) {
-                    int seq = fetch_seq(grid, l, i, j, t);
-                    if (seq == -1)
-                        break;
-                    int dim = grid.seq2normal[seq];
-                    if (dim != v)
-                        continue;
-
-                    float pos[3];
-                    pos_from_seq(grid, seq, pos);
-
-                    int ci = 0;
-                    int cj = 0;
-                    if (dim == 0) {
-                        ci = (pos[1]/2+0.5)*height;
-                        cj = (pos[0]/2+0.5)*width;
-                    }
-                    else if (dim == 1) {
-                        ci = (pos[1]/2+0.5)*height;
-                        cj = (pos[2]/2+0.5)*width;
-                    }
-                    else {
-                        ci = (-pos[2]/2+0.5)*height;
-                        cj = (pos[0]/2+0.5)*width;
-                    }
-
-                    if ((ci % (grid.stride * 2) < grid.stride) && (cj % (grid.stride * 2) >= grid.stride))
-                        grid.seq2evencorner[seq] = 1;
-
-                    if ((ci % (grid.stride * 2) >= grid.stride) && (cj % (grid.stride * 2) < grid.stride))
-                        grid.seq2oddcorner[seq] = 1;
-
-                    bool is_boundary = false;
-                    if (vd == v) {
-                        if (l == 0 || l == num_layers - 1)
-                            is_boundary = true;
-                        else {
-                            int seq_new = fetch_seq(grid, l + 1, i, j, t);
-                            if (seq_new == -1)
-                                is_boundary = true;
-                        }
-                    }
-                    int boundary_info = 0;
-                    if (is_boundary && (l == 0))
-                        boundary_info = -1;
-                    else if (is_boundary)
-                        boundary_info = 1;
-                    if (visited_seq.count(seq))
-                        continue;
-                    visited_seq.insert(seq);
-
-                    FetchNeighbor(grid, seq, pos, dim, boundary_info, view_layer_positions, &grid.seq2neighbor[seq * 9]);
-                }
-            }
-        }
-    }
-}
-
-void PadGrid(Grid& src, Grid& tar, std::vector<torch::Tensor>& view_layer_positions) {
-    auto& downsample_seq = src.downsample_seq;
-    auto& seq2evencorner = src.seq2evencorner;
-    auto& seq2oddcorner = src.seq2oddcorner;
-    int indices[9];
-    std::vector<int> mapped_even_corners(tar.seq2grid.size(), 0);
-    std::vector<int> mapped_odd_corners(tar.seq2grid.size(), 0);
-    for (int i = 0; i < downsample_seq.size(); ++i) {
-        if (seq2evencorner[i] > 0) {
-            mapped_even_corners[downsample_seq[i]] = 1;
-        }
-        if (seq2oddcorner[i] > 0) {
-            mapped_odd_corners[downsample_seq[i]] = 1;
-        }
-    }
-    auto& tar_seq2normal = tar.seq2normal;
-    auto& tar_seq2grid = tar.seq2grid;
-    for (int i = 0; i < tar_seq2grid.size(); ++i) {
-        if (mapped_even_corners[i] == 1 && mapped_odd_corners[i] == 1)
-            continue;
-        auto k = tar_seq2grid[i];
-        float p[3];
-        key2cornerpos(k, tar.resolution, p);
-
-        int src_key = pos2key(p, src.resolution);
-        if (!src.grid2seq.count(src_key)) {
-            int seq = src.seq2grid.size();
-            src.grid2seq[src_key] = seq;
-            src.seq2evencorner.emplace_back((mapped_even_corners[i] == 0));
-            src.seq2oddcorner.emplace_back((mapped_odd_corners[i] == 0));
-            src.seq2grid.emplace_back(src_key);
-            src.seq2normal.emplace_back(tar_seq2normal[i]);
-            FetchNeighbor(src, seq, p, tar_seq2normal[i], 0, view_layer_positions, indices);
-            for (int j = 0; j < 9; ++j) {
-                src.seq2neighbor.emplace_back(indices[j]);
-            }
-            src.downsample_seq.emplace_back(i);
-        } else {
-            int seq = src.grid2seq[src_key];
-            if (mapped_even_corners[i] == 0)
-                src.seq2evencorner[seq] = 1;
-            if (mapped_odd_corners[i] == 0)
-                src.seq2oddcorner[seq] = 1;
-        }
-    }
-}
-
-std::vector<std::vector<torch::Tensor>> build_hierarchy(std::vector<torch::Tensor> view_layer_positions,
-    std::vector<torch::Tensor> view_layer_normals, int num_level, int resolution)
-{
-    if (view_layer_positions.size() != 3 || num_level < 1) {
-        printf("Alert! We require 3 layers and at least 1 level! (%d %d)\n", view_layer_positions.size(), num_level);
-        return {{},{},{},{}};
-    }
-
-    std::vector<Grid> grids;
-    grids.resize(num_level);
-
-    std::vector<float> seq2pos;
-    auto& seq2grid = grids[0].seq2grid;
-    auto& seq2normal = grids[0].seq2normal;
-    auto& grid2seq = grids[0].grid2seq;
-    grids[0].resolution = resolution;
-    grids[0].stride = 1;
-
-    auto int64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false);
-    auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false);
-
-    for (int v = 0; v < 3; ++v) {
-        int num_layers = view_layer_positions[v].size(0);
-        int height = view_layer_positions[v].size(1);
-        int width = view_layer_positions[v].size(2);
-        float* data = view_layer_positions[v].data_ptr<float>();
-        float* data_normal = view_layer_normals[v].data_ptr<float>();
-        for (int l = 0; l < num_layers; ++l) {
-            for (int i = 0; i < height; ++i) {
-                for (int j = 0; j < width; ++j) {
-                    float* p = &data[(i * width + j) * 4];
-                    float* n = &data_normal[(i * width + j) * 3];
-                    if (p[3] == 0)
-                        continue;
-                    auto k = pos2key(p, resolution);
-                    if (!grid2seq.count(k)) {
-                        int dim = 0;
-                        for (int d = 0; d < 3; ++d) {
-                            if (std::abs(n[d]) > std::abs(n[dim]))
-                                dim = d;
-                        }
-                        dim = (dim + 1) % 3;
-                        grid2seq[k] = seq2grid.size();
-                        seq2grid.emplace_back(k);
-                        seq2pos.push_back(p[0]);
-                        seq2pos.push_back(p[1]);
-                        seq2pos.push_back(p[2]);
-                        seq2normal.emplace_back(dim);
-                    }
-                }
-            }
-            data += (height * width * 4);
-            data_normal += (height * width * 3);
-        }
-    }
-
-    for (int i = 0; i < num_level - 1; ++i) {
-        DownsampleGrid(grids[i], grids[i + 1]);
-    }
-
-    for (int l = 0; l < num_level; ++l) {
-        grids[l].seq2neighbor.resize(grids[l].seq2grid.size() * 9, -1);
-        grids[l].num_origin_seq = grids[l].seq2grid.size();
-        for (int d = 0; d < 3; ++d) {
-            NeighborGrid(grids[l], view_layer_positions, d);
-        }
-    }
-
-    for (int i = num_level - 2; i >= 0; --i) {
-        PadGrid(grids[i], grids[i + 1], view_layer_positions);
-    }
-    for (int i = grids[0].num_origin_seq; i < grids[0].seq2grid.size(); ++i) {
-        int k = grids[0].seq2grid[i];
-        float p[3];
-        key2pos(k, grids[0].resolution, p);
-        seq2pos.push_back(p[0]);
-        seq2pos.push_back(p[1]);
-        seq2pos.push_back(p[2]);
-    }
-
-    std::vector<torch::Tensor> texture_positions(2);
-    std::vector<torch::Tensor> grid_neighbors(grids.size());
-    std::vector<torch::Tensor> grid_downsamples(grids.size() - 1);
-    std::vector<torch::Tensor> grid_evencorners(grids.size());
-    std::vector<torch::Tensor> grid_oddcorners(grids.size());
-
-    texture_positions[0] = torch::zeros({seq2pos.size() / 3, 3}, float_options);
-    texture_positions[1] = torch::zeros({seq2pos.size() / 3}, float_options);
-    float* positions_out_ptr = texture_positions[0].data_ptr<float>();
-    memcpy(positions_out_ptr, seq2pos.data(), sizeof(float) * seq2pos.size());
-    positions_out_ptr = texture_positions[1].data_ptr<float>();
-    for (int i = 0; i < grids[0].seq2grid.size(); ++i) {
-        positions_out_ptr[i] = (i < grids[0].num_origin_seq);
-    }
-
-    for (int i = 0; i < grids.size(); ++i) {
-        grid_neighbors[i] = torch::zeros({grids[i].seq2grid.size(), 9}, int64_options);
-        long* nptr = grid_neighbors[i].data_ptr<long>();
-        for (int j = 0; j < grids[i].seq2neighbor.size(); ++j) {
-            nptr[j] = grids[i].seq2neighbor[j];
-        }
-
-        grid_evencorners[i] = torch::zeros({grids[i].seq2evencorner.size()}, int64_options);
-        grid_oddcorners[i] = torch::zeros({grids[i].seq2oddcorner.size()}, int64_options);
-        long* dptr = grid_evencorners[i].data_ptr<long>();
-        for (int j = 0; j < grids[i].seq2evencorner.size(); ++j) {
-            dptr[j] = grids[i].seq2evencorner[j];
-        }
-        dptr = grid_oddcorners[i].data_ptr<long>();
-        for (int j = 0; j < grids[i].seq2oddcorner.size(); ++j) {
-            dptr[j] = grids[i].seq2oddcorner[j];
-        }            
-        if (i + 1 < grids.size()) {
-            grid_downsamples[i] = torch::zeros({grids[i].downsample_seq.size()}, int64_options);
-            long* dptr = grid_downsamples[i].data_ptr<long>();
-            for (int j = 0; j < grids[i].downsample_seq.size(); ++j) {
-                dptr[j] = grids[i].downsample_seq[j];
-            }
-        }
-
-    }
-    return {texture_positions, grid_neighbors, grid_downsamples, grid_evencorners, grid_oddcorners};
-}
-
-std::vector<std::vector<torch::Tensor>> build_hierarchy_with_feat(
-    std::vector<torch::Tensor> view_layer_positions,
-    std::vector<torch::Tensor> view_layer_normals,
-    std::vector<torch::Tensor> view_layer_feats,
-    int num_level, int resolution)
-{
-    if (view_layer_positions.size() != 3 || num_level < 1) {
-        printf("Alert! We require 3 layers and at least 1 level! (%d %d)\n", view_layer_positions.size(), num_level);
-        return {{},{},{},{}};
-    }
-
-    std::vector<Grid> grids;
-    grids.resize(num_level);
-
-    std::vector<float> seq2pos;
-    std::vector<float> seq2feat;
-    auto& seq2grid = grids[0].seq2grid;
-    auto& seq2normal = grids[0].seq2normal;
-    auto& grid2seq = grids[0].grid2seq;
-    grids[0].resolution = resolution;
-    grids[0].stride = 1;
-
-    auto int64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false);
-    auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false);
-
-    int feat_channel = 3;
-    for (int v = 0; v < 3; ++v) {
-        int num_layers = view_layer_positions[v].size(0);
-        int height = view_layer_positions[v].size(1);
-        int width = view_layer_positions[v].size(2);
-        float* data = view_layer_positions[v].data_ptr<float>();
-        float* data_normal = view_layer_normals[v].data_ptr<float>();
-        float* data_feat = view_layer_feats[v].data_ptr<float>();
-        feat_channel = view_layer_feats[v].size(3);
-        for (int l = 0; l < num_layers; ++l) {
-            for (int i = 0; i < height; ++i) {
-                for (int j = 0; j < width; ++j) {
-                    float* p = &data[(i * width + j) * 4];
-                    float* n = &data_normal[(i * width + j) * 3];
-                    float* f = &data_feat[(i * width + j) * feat_channel];
-                    if (p[3] == 0)
-                        continue;
-                    auto k = pos2key(p, resolution);
-                    if (!grid2seq.count(k)) {
-                        int dim = 0;
-                        for (int d = 0; d < 3; ++d) {
-                            if (std::abs(n[d]) > std::abs(n[dim]))
-                                dim = d;
-                        }
-                        dim = (dim + 1) % 3;
-                        grid2seq[k] = seq2grid.size();
-                        seq2grid.emplace_back(k);
-                        seq2pos.push_back(p[0]);
-                        seq2pos.push_back(p[1]);
-                        seq2pos.push_back(p[2]);
-                        for (int c = 0; c < feat_channel; ++c) {
-                            seq2feat.emplace_back(f[c]);
-                        }
-                        seq2normal.emplace_back(dim);
-                    }
-                }
-            }
-            data += (height * width * 4);
-            data_normal += (height * width * 3);
-            data_feat += (height * width * feat_channel);
-        }
-    }
-
-    for (int i = 0; i < num_level - 1; ++i) {
-        DownsampleGrid(grids[i], grids[i + 1]);
-    }
-
-    for (int l = 0; l < num_level; ++l) {
-        grids[l].seq2neighbor.resize(grids[l].seq2grid.size() * 9, -1);
-        grids[l].num_origin_seq = grids[l].seq2grid.size();
-        for (int d = 0; d < 3; ++d) {
-            NeighborGrid(grids[l], view_layer_positions, d);
-        }
-    }
-
-    for (int i = num_level - 2; i >= 0; --i) {
-        PadGrid(grids[i], grids[i + 1], view_layer_positions);
-    }
-    for (int i = grids[0].num_origin_seq; i < grids[0].seq2grid.size(); ++i) {
-        int k = grids[0].seq2grid[i];
-        float p[3];
-        key2pos(k, grids[0].resolution, p);
-        seq2pos.push_back(p[0]);
-        seq2pos.push_back(p[1]);
-        seq2pos.push_back(p[2]);
-        for (int c = 0; c < feat_channel; ++c) {
-            seq2feat.emplace_back(0.5);
-        }
-    }
-
-    std::vector<torch::Tensor> texture_positions(2);
-    std::vector<torch::Tensor> texture_feats(1);
-    std::vector<torch::Tensor> grid_neighbors(grids.size());
-    std::vector<torch::Tensor> grid_downsamples(grids.size() - 1);
-    std::vector<torch::Tensor> grid_evencorners(grids.size());
-    std::vector<torch::Tensor> grid_oddcorners(grids.size());
-
-    texture_positions[0] = torch::zeros({seq2pos.size() / 3, 3}, float_options);
-    texture_positions[1] = torch::zeros({seq2pos.size() / 3}, float_options);
-    texture_feats[0] = torch::zeros({seq2feat.size() / feat_channel, feat_channel}, float_options);
-    float* positions_out_ptr = texture_positions[0].data_ptr<float>();
-    memcpy(positions_out_ptr, seq2pos.data(), sizeof(float) * seq2pos.size());
-    positions_out_ptr = texture_positions[1].data_ptr<float>();
-    for (int i = 0; i < grids[0].seq2grid.size(); ++i) {
-        positions_out_ptr[i] = (i < grids[0].num_origin_seq);
-    }
-    float* feats_out_ptr = texture_feats[0].data_ptr<float>();
-    memcpy(feats_out_ptr, seq2feat.data(), sizeof(float) * seq2feat.size());
-
-    for (int i = 0; i < grids.size(); ++i) {
-        grid_neighbors[i] = torch::zeros({grids[i].seq2grid.size(), 9}, int64_options);
-        long* nptr = grid_neighbors[i].data_ptr<long>();
-        for (int j = 0; j < grids[i].seq2neighbor.size(); ++j) {
-            nptr[j] = grids[i].seq2neighbor[j];
-        }
-        grid_evencorners[i] = torch::zeros({grids[i].seq2evencorner.size()}, int64_options);
-        grid_oddcorners[i] = torch::zeros({grids[i].seq2oddcorner.size()}, int64_options);
-        long* dptr = grid_evencorners[i].data_ptr<long>();
-        for (int j = 0; j < grids[i].seq2evencorner.size(); ++j) {
-            dptr[j] = grids[i].seq2evencorner[j];
-        }
-        dptr = grid_oddcorners[i].data_ptr<long>();
-        for (int j = 0; j < grids[i].seq2oddcorner.size(); ++j) {
-            dptr[j] = grids[i].seq2oddcorner[j];
-        }
-        if (i + 1 < grids.size()) {
-            grid_downsamples[i] = torch::zeros({grids[i].downsample_seq.size()}, int64_options);
-            long* dptr = grid_downsamples[i].data_ptr<long>();
-            for (int j = 0; j < grids[i].downsample_seq.size(); ++j) {
-                dptr[j] = grids[i].downsample_seq[j];
-            }
-        }
-    }
-    return {texture_positions, texture_feats, grid_neighbors, grid_downsamples, grid_evencorners, grid_oddcorners};
-}
diff --git a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp b/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp
deleted file mode 100644
index b3ff69f5abe309be2784303d384524774708c2a3..0000000000000000000000000000000000000000
--- a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-#include "rasterizer.h"
-
-void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) {
-    float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0]));
-    float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0]));
-    float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1]));
-    float y_max = std::max(vt0[1], std::max(vt1[1],vt2[1]));
-
-    for (int px = x_min; px < x_max + 1; ++px) {
-        if (px < 0 || px >= width)
-            continue;
-        for (int py = y_min; py < y_max + 1; ++py) {
-            if (py < 0 || py >= height)
-                continue;
-            float vt[2] = {px + 0.5, py + 0.5};
-            float baryCentricCoordinate[3];
-            calculateBarycentricCoordinate(vt0, vt1, vt2, vt, baryCentricCoordinate);
-            if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
-                int pixel = py * width + px;
-                if (zbuffer == 0) {
-                    zbuffer[pixel] = (INT64)(idx + 1);
-                    continue;
-                }
-
-                float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2];
-                float depth_thres = 0;
-                if (d) {
-                    depth_thres = d[pixel] * 0.49999f + 0.5f + occlusion_truncation;
-                }
-                
-                int z_quantize = depth * (2<<17);
-                INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1);
-                if (depth < depth_thres)
-                    continue;
-                zbuffer[pixel] = std::min(zbuffer[pixel], token);
-            }
-        }
-    }
-}
-
-void barycentricFromImgcoordCPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces,
-    float* barycentric_map, int pix)
-{
-    INT64 f = zbuffer[pix] % MAXINT;
-    if (f == (MAXINT-1)) {
-        findices[pix] = 0;
-        barycentric_map[pix * 3] = 0;
-        barycentric_map[pix * 3 + 1] = 0;
-        barycentric_map[pix * 3 + 2] = 0;
-        return;
-    }
-    findices[pix] = f;
-    f -= 1;
-    float barycentric[3] = {0, 0, 0};
-    if (f >= 0) {
-        float vt[2] = {float(pix % width) + 0.5f, float(pix / width) + 0.5f};
-        float* vt0_ptr = V + (F[f * 3] * 4);
-        float* vt1_ptr = V + (F[f * 3 + 1] * 4);
-        float* vt2_ptr = V + (F[f * 3 + 2] * 4);
-
-        float vt0[2] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f};
-        float vt1[2] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f};
-        float vt2[2] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f};
-
-        calculateBarycentricCoordinate(vt0, vt1, vt2, vt, barycentric);
-
-        barycentric[0] = barycentric[0] / vt0_ptr[3];
-        barycentric[1] = barycentric[1] / vt1_ptr[3];
-        barycentric[2] = barycentric[2] / vt2_ptr[3];
-        float w = 1.0f / (barycentric[0] + barycentric[1] + barycentric[2]);
-        barycentric[0] *= w;
-        barycentric[1] *= w;
-        barycentric[2] *= w;
-
-    }
-    barycentric_map[pix * 3] = barycentric[0];
-    barycentric_map[pix * 3 + 1] = barycentric[1];
-    barycentric_map[pix * 3 + 2] = barycentric[2];
-}
-
-void rasterizeImagecoordsKernelCPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces, int f)
-{
-    float* vt0_ptr = V + (F[f * 3] * 4);
-    float* vt1_ptr = V + (F[f * 3 + 1] * 4);
-    float* vt2_ptr = V + (F[f * 3 + 2] * 4);
-
-    float vt0[3] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f, vt0_ptr[2] / vt0_ptr[3] * 0.49999f + 0.5f};
-    float vt1[3] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f, vt1_ptr[2] / vt1_ptr[3] * 0.49999f + 0.5f};
-    float vt2[3] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f, vt2_ptr[2] / vt2_ptr[3] * 0.49999f + 0.5f};
-
-    rasterizeTriangleCPU(f, vt0, vt1, vt2, width, height, zbuffer, d, occlusion_trunc);
-}
-
-std::vector<torch::Tensor> rasterize_image_cpu(torch::Tensor V, torch::Tensor F, torch::Tensor D,
-    int width, int height, float occlusion_truncation, int use_depth_prior)
-{
-    int num_faces = F.size(0);
-    int num_vertices = V.size(0);
-    auto options = torch::TensorOptions().dtype(torch::kInt32).requires_grad(false);
-    auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false);
-    auto findices = torch::zeros({height, width}, options);
-    INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1);
-    auto z_min = torch::ones({height, width}, INT64_options) * (long)maxint;
-
-    if (!use_depth_prior) {
-        for (int i = 0; i < num_faces; ++i) {
-            rasterizeImagecoordsKernelCPU(V.data_ptr<float>(), F.data_ptr<int>(), 0,
-                (INT64*)z_min.data_ptr<long>(), occlusion_truncation, width, height, num_vertices, num_faces, i); 
-        }
-    } else {
-        for (int i = 0; i < num_faces; ++i)
-            rasterizeImagecoordsKernelCPU(V.data_ptr<float>(), F.data_ptr<int>(), D.data_ptr<float>(),
-                (INT64*)z_min.data_ptr<long>(), occlusion_truncation, width, height, num_vertices, num_faces, i);
-    }
-
-    auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false);
-    auto barycentric = torch::zeros({height, width, 3}, float_options);
-    for (int i = 0; i < width * height; ++i)
-        barycentricFromImgcoordCPU(V.data_ptr<float>(), F.data_ptr<int>(),
-            findices.data_ptr<int>(), (INT64*)z_min.data_ptr<long>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>(), i);
-
-    return {findices, barycentric};
-}
-
-std::vector<torch::Tensor> rasterize_image(torch::Tensor V, torch::Tensor F, torch::Tensor D,
-    int width, int height, float occlusion_truncation, int use_depth_prior)
-{
-    int device_id = V.get_device();
-    if (device_id == -1)
-        return rasterize_image_cpu(V, F, D, width, height, occlusion_truncation, use_depth_prior);
-    else
-        return rasterize_image_gpu(V, F, D, width, height, occlusion_truncation, use_depth_prior);
-}
-
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-  m.def("rasterize_image", &rasterize_image, "Custom image rasterization");
-  m.def("build_hierarchy", &build_hierarchy, "Custom image rasterization");
-  m.def("build_hierarchy_with_feat", &build_hierarchy_with_feat, "Custom image rasterization");
-}
diff --git a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h b/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h
deleted file mode 100644
index cf4f9870bda0714763e4236f85293ca7cef7d51f..0000000000000000000000000000000000000000
--- a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#ifndef RASTERIZER_H_
-#define RASTERIZER_H_
-
-#include <torch/extension.h>
-#include <vector>
-#include <ATen/ATen.h>
-#include <ATen/cuda/CUDAContext.h> // For CUDA context
-
-#define INT64 unsigned long long
-#define MAXINT 2147483647
-
-__host__ __device__ inline float calculateSignedArea2(float* a, float* b, float* c) {
-    return ((c[0] - a[0]) * (b[1] - a[1]) - (b[0] - a[0]) * (c[1] - a[1]));
-}
-
-__host__ __device__  inline void calculateBarycentricCoordinate(float* a, float* b, float* c, float* p,
-    float* barycentric)
-{
-    float beta_tri = calculateSignedArea2(a, p, c);
-    float gamma_tri = calculateSignedArea2(a, b, p);
-    float area = calculateSignedArea2(a, b, c);
-    if (area == 0) {
-        barycentric[0] = -1.0;
-        barycentric[1] = -1.0;
-        barycentric[2] = -1.0;
-        return;
-    }
-    float tri_inv = 1.0 / area;
-    float beta = beta_tri * tri_inv;
-    float gamma = gamma_tri * tri_inv;
-    float alpha = 1.0 - beta - gamma;
-    barycentric[0] = alpha;
-    barycentric[1] = beta;
-    barycentric[2] = gamma;
-}
-
-__host__ __device__  inline bool isBarycentricCoordInBounds(float* barycentricCoord) {
-    return barycentricCoord[0] >= 0.0 && barycentricCoord[0] <= 1.0 &&
-           barycentricCoord[1] >= 0.0 && barycentricCoord[1] <= 1.0 &&
-           barycentricCoord[2] >= 0.0 && barycentricCoord[2] <= 1.0;
-}
-
-std::vector<torch::Tensor> rasterize_image_gpu(torch::Tensor V, torch::Tensor F, torch::Tensor D,
-    int width, int height, float occlusion_truncation, int use_depth_prior);
-
-std::vector<std::vector<torch::Tensor>> build_hierarchy(std::vector<torch::Tensor> view_layer_positions, std::vector<torch::Tensor> view_layer_normals, int num_level, int resolution);
-
-std::vector<std::vector<torch::Tensor>> build_hierarchy_with_feat(
-    std::vector<torch::Tensor> view_layer_positions,
-    std::vector<torch::Tensor> view_layer_normals,
-    std::vector<torch::Tensor> view_layer_feats,
-    int num_level, int resolution);
-
-#endif
\ No newline at end of file
diff --git a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu b/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu
deleted file mode 100644
index 709c1b86a5ee92e3c2ff6ce9df85f1492c3c5378..0000000000000000000000000000000000000000
--- a/hy3dgen/texgen/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu
+++ /dev/null
@@ -1,127 +0,0 @@
-#include "rasterizer.h"
-
-__device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) {
-    float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0]));
-    float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0]));
-    float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1]));
-    float y_max = std::max(vt0[1], std::max(vt1[1],vt2[1]));
-
-    for (int px = x_min; px < x_max + 1; ++px) {
-        if (px < 0 || px >= width)
-            continue;
-        for (int py = y_min; py < y_max + 1; ++py) {
-            if (py < 0 || py >= height)
-                continue;
-            float vt[2] = {px + 0.5f, py + 0.5f};
-            float baryCentricCoordinate[3];
-            calculateBarycentricCoordinate(vt0, vt1, vt2, vt, baryCentricCoordinate);
-            if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
-                int pixel = py * width + px;
-                if (zbuffer == 0) {
-                    atomicExch(&zbuffer[pixel], (INT64)(idx + 1));
-                    continue;
-                }
-                float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2];
-                float depth_thres = 0;
-                if (d) {
-                    depth_thres = d[pixel] * 0.49999f + 0.5f + occlusion_truncation;
-                }
-                
-                int z_quantize = depth * (2<<17);
-                INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1);
-                if (depth < depth_thres)
-                    continue;
-                atomicMin(&zbuffer[pixel], token);
-            }
-        }
-    }
-}
-
-__global__ void barycentricFromImgcoordGPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces,
-    float* barycentric_map)
-{
-    int pix = blockIdx.x * blockDim.x + threadIdx.x;
-    if (pix >= width * height)
-        return;
-    INT64 f = zbuffer[pix] % MAXINT;
-    if (f == (MAXINT-1)) {
-        findices[pix] = 0;
-        barycentric_map[pix * 3] = 0;
-        barycentric_map[pix * 3 + 1] = 0;
-        barycentric_map[pix * 3 + 2] = 0;
-        return;
-    }
-    findices[pix] = f;
-    f -= 1;
-    float barycentric[3] = {0, 0, 0};
-    if (f >= 0) {
-        float vt[2] = {float(pix % width) + 0.5f, float(pix / width) + 0.5f};
-        float* vt0_ptr = V + (F[f * 3] * 4);
-        float* vt1_ptr = V + (F[f * 3 + 1] * 4);
-        float* vt2_ptr = V + (F[f * 3 + 2] * 4);
-
-        float vt0[2] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f};
-        float vt1[2] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f};
-        float vt2[2] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f};
-
-        calculateBarycentricCoordinate(vt0, vt1, vt2, vt, barycentric);
-
-        barycentric[0] = barycentric[0] / vt0_ptr[3];
-        barycentric[1] = barycentric[1] / vt1_ptr[3];
-        barycentric[2] = barycentric[2] / vt2_ptr[3];
-        float w = 1.0f / (barycentric[0] + barycentric[1] + barycentric[2]);
-        barycentric[0] *= w;
-        barycentric[1] *= w;
-        barycentric[2] *= w;
-
-    }
-    barycentric_map[pix * 3] = barycentric[0];
-    barycentric_map[pix * 3 + 1] = barycentric[1];
-    barycentric_map[pix * 3 + 2] = barycentric[2];
-}
-
-__global__ void rasterizeImagecoordsKernelGPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces)
-{
-    int f = blockIdx.x * blockDim.x + threadIdx.x;
-    if (f >= num_faces)
-        return; 
-
-    float* vt0_ptr = V + (F[f * 3] * 4);
-    float* vt1_ptr = V + (F[f * 3 + 1] * 4);
-    float* vt2_ptr = V + (F[f * 3 + 2] * 4);
-
-    float vt0[3] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f, vt0_ptr[2] / vt0_ptr[3] * 0.49999f + 0.5f};
-    float vt1[3] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f, vt1_ptr[2] / vt1_ptr[3] * 0.49999f + 0.5f};
-    float vt2[3] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f, vt2_ptr[2] / vt2_ptr[3] * 0.49999f + 0.5f};
-
-    rasterizeTriangleGPU(f, vt0, vt1, vt2, width, height, zbuffer, d, occlusion_trunc);
-}
-
-std::vector<torch::Tensor> rasterize_image_gpu(torch::Tensor V, torch::Tensor F, torch::Tensor D,
-    int width, int height, float occlusion_truncation, int use_depth_prior)
-{
-    int device_id = V.get_device();
-    cudaSetDevice(device_id);
-    int num_faces = F.size(0);
-    int num_vertices = V.size(0);
-    auto options = torch::TensorOptions().dtype(torch::kInt32).device(torch::kCUDA, device_id).requires_grad(false);
-    auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA, device_id).requires_grad(false);
-    auto findices = torch::zeros({height, width}, options);
-    INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1);
-    auto z_min = torch::ones({height, width}, INT64_options) * (long)maxint;
-
-    if (!use_depth_prior) {
-        rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr<float>(), F.data_ptr<int>(), 0,
-            (INT64*)z_min.data_ptr<long>(), occlusion_truncation, width, height, num_vertices, num_faces); 
-    } else {
-        rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr<float>(), F.data_ptr<int>(), D.data_ptr<float>(),
-            (INT64*)z_min.data_ptr<long>(), occlusion_truncation, width, height, num_vertices, num_faces); 
-    }
-
-    auto float_options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, device_id).requires_grad(false);
-    auto barycentric = torch::zeros({height, width, 3}, float_options);
-    barycentricFromImgcoordGPU<<<(width * height + 255)/256, 256>>>(V.data_ptr<float>(), F.data_ptr<int>(),
-        findices.data_ptr<int>(), (INT64*)z_min.data_ptr<long>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>());
-
-    return {findices, barycentric};
-}
diff --git a/hy3dgen/texgen/differentiable_renderer/__init__.py b/hy3dgen/texgen/differentiable_renderer/__init__.py
index e307c3f8c1292da02f308e4b59ef0bcd6fe7305e..8bb2bf86e283e50f0df2ecfba8fc66289df09901 100644
--- a/hy3dgen/texgen/differentiable_renderer/__init__.py
+++ b/hy3dgen/texgen/differentiable_renderer/__init__.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -20,4 +10,4 @@
 # their software and algorithms, including trained model weights, parameters (including
 # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
 # fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
\ No newline at end of file
diff --git a/hy3dgen/texgen/differentiable_renderer/camera_utils.py b/hy3dgen/texgen/differentiable_renderer/camera_utils.py
index 289710ab787a174b39154f1010fc6209e4c92dfe..b67727c828662e34d14b44c9fbff9f101815fbc1 100644
--- a/hy3dgen/texgen/differentiable_renderer/camera_utils.py
+++ b/hy3dgen/texgen/differentiable_renderer/camera_utils.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
diff --git a/hy3dgen/texgen/differentiable_renderer/compile_mesh_painter.bat b/hy3dgen/texgen/differentiable_renderer/compile_mesh_painter.bat
new file mode 100644
index 0000000000000000000000000000000000000000..3947b0f03f9f6245dac95db7460703076444a304
--- /dev/null
+++ b/hy3dgen/texgen/differentiable_renderer/compile_mesh_painter.bat
@@ -0,0 +1,3 @@
+FOR /F "tokens=*" %%i IN ('python -m pybind11 --includes') DO SET PYINCLUDES=%%i
+echo %PYINCLUDES%
+g++ -O3 -Wall -shared -std=c++11 -fPIC %PYINCLUDES% mesh_processor.cpp -o mesh_processor.pyd -lpython3.12
\ No newline at end of file
diff --git a/hy3dgen/texgen/differentiable_renderer/compile_mesh_painter.sh b/hy3dgen/texgen/differentiable_renderer/compile_mesh_painter.sh
deleted file mode 100644
index 056be9dc28d7947419f06536799b64f65e4ff827..0000000000000000000000000000000000000000
--- a/hy3dgen/texgen/differentiable_renderer/compile_mesh_painter.sh
+++ /dev/null
@@ -1 +0,0 @@
-c++ -O3 -Wall -shared -std=c++11 -fPIC `python3 -m pybind11 --includes` mesh_processor.cpp -o mesh_processor`python3-config --extension-suffix`
\ No newline at end of file
diff --git a/hy3dgen/texgen/differentiable_renderer/mesh_processor.cpp b/hy3dgen/texgen/differentiable_renderer/mesh_processor.cpp
index ed6ac6e62637b97bb97373fff30e90e4e2587fd8..ca8650fada02099d3fce0f551fa4f953f278cf34 100644
--- a/hy3dgen/texgen/differentiable_renderer/mesh_processor.cpp
+++ b/hy3dgen/texgen/differentiable_renderer/mesh_processor.cpp
@@ -1,10 +1,10 @@
-#include <pybind11/pybind11.h>
-#include <pybind11/numpy.h>
-#include <pybind11/stl.h>
 #include <vector>
 #include <queue>
 #include <cmath>
 #include <algorithm>
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+#include <pybind11/stl.h>
 
 namespace py = pybind11;
 using namespace std;
diff --git a/hy3dgen/texgen/differentiable_renderer/mesh_processor.cpython-311-x86_64-linux-gnu.so b/hy3dgen/texgen/differentiable_renderer/mesh_processor.cpython-311-x86_64-linux-gnu.so
deleted file mode 100644
index 42890fece062ce38cfd31c7fb8beb7138fcdb56e..0000000000000000000000000000000000000000
Binary files a/hy3dgen/texgen/differentiable_renderer/mesh_processor.cpython-311-x86_64-linux-gnu.so and /dev/null differ
diff --git a/hy3dgen/texgen/differentiable_renderer/mesh_processor.py b/hy3dgen/texgen/differentiable_renderer/mesh_processor.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a731ccea4237c4605f94c7b52ace42d590b6aa0
--- /dev/null
+++ b/hy3dgen/texgen/differentiable_renderer/mesh_processor.py
@@ -0,0 +1,84 @@
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+
+import numpy as np
+
+def meshVerticeInpaint_smooth(texture, mask, vtx_pos, vtx_uv, pos_idx, uv_idx):
+    texture_height, texture_width, texture_channel = texture.shape
+    vtx_num = vtx_pos.shape[0]
+
+    vtx_mask = np.zeros(vtx_num, dtype=np.float32)
+    vtx_color = [np.zeros(texture_channel, dtype=np.float32) for _ in range(vtx_num)]
+    uncolored_vtxs = []
+    G = [[] for _ in range(vtx_num)]
+
+    for i in range(uv_idx.shape[0]):
+        for k in range(3):
+            vtx_uv_idx = uv_idx[i, k]
+            vtx_idx = pos_idx[i, k]
+            uv_v = int(round(vtx_uv[vtx_uv_idx, 0] * (texture_width - 1)))
+            uv_u = int(round((1.0 - vtx_uv[vtx_uv_idx, 1]) * (texture_height - 1)))
+            if mask[uv_u, uv_v] > 0:
+                vtx_mask[vtx_idx] = 1.0
+                vtx_color[vtx_idx] = texture[uv_u, uv_v]
+            else:
+                uncolored_vtxs.append(vtx_idx)
+            G[pos_idx[i, k]].append(pos_idx[i, (k + 1) % 3])
+
+    smooth_count = 2
+    last_uncolored_vtx_count = 0
+    while smooth_count > 0:
+        uncolored_vtx_count = 0
+        for vtx_idx in uncolored_vtxs:
+            sum_color = np.zeros(texture_channel, dtype=np.float32)
+            total_weight = 0.0
+            vtx_0 = vtx_pos[vtx_idx]
+            for connected_idx in G[vtx_idx]:
+                if vtx_mask[connected_idx] > 0:
+                    vtx1 = vtx_pos[connected_idx]
+                    dist = np.sqrt(np.sum((vtx_0 - vtx1) ** 2))
+                    dist_weight = 1.0 / max(dist, 1e-4)
+                    dist_weight *= dist_weight
+                    sum_color += vtx_color[connected_idx] * dist_weight
+                    total_weight += dist_weight
+            if total_weight > 0:
+                vtx_color[vtx_idx] = sum_color / total_weight
+                vtx_mask[vtx_idx] = 1.0
+            else:
+                uncolored_vtx_count += 1
+
+        if last_uncolored_vtx_count == uncolored_vtx_count:
+            smooth_count -= 1
+        else:
+            smooth_count += 1
+        last_uncolored_vtx_count = uncolored_vtx_count
+
+    new_texture = texture.copy()
+    new_mask = mask.copy()
+    for face_idx in range(uv_idx.shape[0]):
+        for k in range(3):
+            vtx_uv_idx = uv_idx[face_idx, k]
+            vtx_idx = pos_idx[face_idx, k]
+            if vtx_mask[vtx_idx] == 1.0:
+                uv_v = int(round(vtx_uv[vtx_uv_idx, 0] * (texture_width - 1)))
+                uv_u = int(round((1.0 - vtx_uv[vtx_uv_idx, 1]) * (texture_height - 1)))
+                new_texture[uv_u, uv_v] = vtx_color[vtx_idx]
+                new_mask[uv_u, uv_v] = 255
+    return new_texture, new_mask
+
+def meshVerticeInpaint(texture, mask, vtx_pos, vtx_uv, pos_idx, uv_idx, method="smooth"):
+    if method == "smooth":
+        return meshVerticeInpaint_smooth(texture, mask, vtx_pos, vtx_uv, pos_idx, uv_idx)
+    else:
+        raise ValueError("Invalid method. Use 'smooth' or 'forward'.")
\ No newline at end of file
diff --git a/hy3dgen/texgen/differentiable_renderer/mesh_render.py b/hy3dgen/texgen/differentiable_renderer/mesh_render.py
index c85b80e043221282e9ff6bfb81764fb32c5d48ed..30049efbdcce375fa13fbae26d6b9da67e21c3cb 100644
--- a/hy3dgen/texgen/differentiable_renderer/mesh_render.py
+++ b/hy3dgen/texgen/differentiable_renderer/mesh_render.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
diff --git a/hy3dgen/texgen/differentiable_renderer/mesh_utils.py b/hy3dgen/texgen/differentiable_renderer/mesh_utils.py
index ca0ba1a6145c68651ec033b97e80900cd2c9d7ec..fa5694a66a501262b3d50acdf1340d0fa8487dec 100644
--- a/hy3dgen/texgen/differentiable_renderer/mesh_utils.py
+++ b/hy3dgen/texgen/differentiable_renderer/mesh_utils.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
diff --git a/hy3dgen/texgen/differentiable_renderer/setup.py b/hy3dgen/texgen/differentiable_renderer/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..1bfdb10a559dc63f56502f45efdef0470dd41bc5
--- /dev/null
+++ b/hy3dgen/texgen/differentiable_renderer/setup.py
@@ -0,0 +1,62 @@
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+
+from setuptools import setup, Extension
+import pybind11
+import sys
+import platform
+
+def get_platform_specific_args():
+    system = platform.system().lower()
+    cpp_std = 'c++14'  # Make configurable if needed
+    
+    if sys.platform == 'win32':
+        compile_args = ['/O2', f'/std:{cpp_std}', '/EHsc', '/MP', '/DWIN32_LEAN_AND_MEAN', '/bigobj']
+        link_args = []
+        extra_includes = []
+    elif system == 'linux':
+        compile_args = ['-O3', f'-std={cpp_std}', '-fPIC', '-Wall', '-Wextra', '-pthread']
+        link_args = ['-fPIC', '-pthread']
+        extra_includes = []
+    elif sys.platform == 'darwin':
+        compile_args = ['-O3', f'-std={cpp_std}', '-fPIC', '-Wall', '-Wextra',
+                       '-stdlib=libc++', '-mmacosx-version-min=10.14']
+        link_args = ['-fPIC', '-stdlib=libc++', '-mmacosx-version-min=10.14', '-dynamiclib']
+        extra_includes = []
+    else:
+        raise RuntimeError(f"Unsupported platform: {system}")
+    
+    return compile_args, link_args, extra_includes
+
+extra_compile_args, extra_link_args, platform_includes = get_platform_specific_args()
+include_dirs = [pybind11.get_include(), pybind11.get_include(user=True)]
+include_dirs.extend(platform_includes)
+
+ext_modules = [
+    Extension(
+        "mesh_processor",
+        ["mesh_processor.cpp"],
+        include_dirs=include_dirs,
+        language='c++',
+        extra_compile_args=extra_compile_args,
+        extra_link_args=extra_link_args,
+    ),
+]
+
+setup(
+    name="mesh_processor",
+    ext_modules=ext_modules,
+    install_requires=['pybind11>=2.6.0'],
+    python_requires='>=3.6',
+)
\ No newline at end of file
diff --git a/hy3dgen/texgen/hunyuanpaint/__init__.py b/hy3dgen/texgen/hunyuanpaint/__init__.py
index e307c3f8c1292da02f308e4b59ef0bcd6fe7305e..8bb2bf86e283e50f0df2ecfba8fc66289df09901 100644
--- a/hy3dgen/texgen/hunyuanpaint/__init__.py
+++ b/hy3dgen/texgen/hunyuanpaint/__init__.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -20,4 +10,4 @@
 # their software and algorithms, including trained model weights, parameters (including
 # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
 # fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
\ No newline at end of file
diff --git a/hy3dgen/texgen/hunyuanpaint/pipeline.py b/hy3dgen/texgen/hunyuanpaint/pipeline.py
index 436ce34efb8bc40c3df2b3902b7a29dffa39ae91..7a0c8a19604f969b20e0ffc27f6f33820a10e99a 100644
--- a/hy3dgen/texgen/hunyuanpaint/pipeline.py
+++ b/hy3dgen/texgen/hunyuanpaint/pipeline.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -119,6 +109,8 @@ class HunyuanPaintPipeline(StableDiffusionPipeline):
         return_dict=True,
         **cached_condition,
     ):
+        device = self._execution_device
+
         if image is None:
             raise ValueError("Inputting embeddings not supported for this pipeline. Please pass an image.")
         assert not isinstance(image, torch.Tensor)
@@ -127,7 +119,7 @@ class HunyuanPaintPipeline(StableDiffusionPipeline):
 
         image_vae = torch.tensor(np.array(image) / 255.0)
         image_vae = image_vae.unsqueeze(0).permute(0, 3, 1, 2).unsqueeze(0)
-        image_vae = image_vae.to(device=self.vae.device, dtype=self.vae.dtype)
+        image_vae = image_vae.to(device=device, dtype=self.vae.dtype)
 
         batch_size = image_vae.shape[0]
         assert batch_size == 1
@@ -171,13 +163,13 @@ class HunyuanPaintPipeline(StableDiffusionPipeline):
             camera_info = cached_condition['camera_info_gen']  # B,N
             if isinstance(camera_info, List):
                 camera_info = torch.tensor(camera_info)
-            camera_info = camera_info.to(image_vae.device).to(torch.int64)
+            camera_info = camera_info.to(device).to(torch.int64)
             cached_condition['camera_info_gen'] = camera_info
         if 'camera_info_ref' in cached_condition:
             camera_info = cached_condition['camera_info_ref']  # B,N
             if isinstance(camera_info, List):
                 camera_info = torch.tensor(camera_info)
-            camera_info = camera_info.to(image_vae.device).to(torch.int64)
+            camera_info = camera_info.to(device).to(torch.int64)
             cached_condition['camera_info_ref'] = camera_info
 
         cached_condition['ref_latents'] = ref_latents
diff --git a/hy3dgen/texgen/hunyuanpaint/unet/__init__.py b/hy3dgen/texgen/hunyuanpaint/unet/__init__.py
index e307c3f8c1292da02f308e4b59ef0bcd6fe7305e..8bb2bf86e283e50f0df2ecfba8fc66289df09901 100644
--- a/hy3dgen/texgen/hunyuanpaint/unet/__init__.py
+++ b/hy3dgen/texgen/hunyuanpaint/unet/__init__.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -20,4 +10,4 @@
 # their software and algorithms, including trained model weights, parameters (including
 # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
 # fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
\ No newline at end of file
diff --git a/hy3dgen/texgen/hunyuanpaint/unet/modules.py b/hy3dgen/texgen/hunyuanpaint/unet/modules.py
index 5d16bc6b6bb1ebc72c602dcb298d122429fe847d..e2ee269a74fa3941ffaacf8401b4c1a5935b74a7 100644
--- a/hy3dgen/texgen/hunyuanpaint/unet/modules.py
+++ b/hy3dgen/texgen/hunyuanpaint/unet/modules.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -22,7 +12,6 @@
 # fine-tuning enabling code and other elements of the foregoing made publicly available
 # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
-
 import copy
 import json
 import os
diff --git a/hy3dgen/texgen/pipelines.py b/hy3dgen/texgen/pipelines.py
index 7aaa0390e01a21513089b5d5bb6bd35eafb2cecd..7f2eb9d69ff9b1f8ecba0c2b7c963d8f2ac442ba 100644
--- a/hy3dgen/texgen/pipelines.py
+++ b/hy3dgen/texgen/pipelines.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -24,15 +14,16 @@
 
 
 import logging
-import os
-
 import numpy as np
+import os
 import torch
 from PIL import Image
+from typing import Union, Optional
 
 from .differentiable_renderer.mesh_render import MeshRender
 from .utils.dehighlight_utils import Light_Shadow_Remover
 from .utils.multiview_utils import Multiview_Diffusion_Net
+from .utils.imagesuper_utils import Image_Super_Net
 from .utils.uv_warp_utils import mesh_uv_wrap
 
 logger = logging.getLogger(__name__)
@@ -50,7 +41,7 @@ class Hunyuan3DTexGenConfig:
         self.candidate_view_weights = [1, 0.1, 0.5, 0.1, 0.05, 0.05]
 
         self.render_size = 2048
-        self.texture_size = 1024
+        self.texture_size = 2048
         self.bake_exp = 4
         self.merge_method = 'fast'
 
@@ -77,7 +68,6 @@ class Hunyuan3DPaintPipeline:
                                                                    allow_patterns=["hunyuan3d-paint-v2-0/*"])
                     delight_model_path = os.path.join(model_path, 'hunyuan3d-delight-v2-0')
                     multiview_model_path = os.path.join(model_path, 'hunyuan3d-paint-v2-0')
-                    
                     return cls(Hunyuan3DTexGenConfig(delight_model_path, multiview_model_path))
                 except ImportError:
                     logger.warning(
@@ -104,6 +94,11 @@ class Hunyuan3DPaintPipeline:
         # Load model
         self.models['delight_model'] = Light_Shadow_Remover(self.config)
         self.models['multiview_model'] = Multiview_Diffusion_Net(self.config)
+        # self.models['super_model'] = Image_Super_Net(self.config)
+
+    def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
+        self.models['delight_model'].pipeline.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
+        self.models['multiview_model'].pipeline.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
 
     def render_normal_multiview(self, camera_elevs, camera_azims, use_abs_coor=True):
         normal_maps = []
@@ -150,6 +145,40 @@ class Hunyuan3DPaintPipeline:
 
         return texture
 
+    def recenter_image(self, image, border_ratio=0.2):
+        if image.mode == 'RGB':
+            return image
+        elif image.mode == 'L':
+            image = image.convert('RGB')
+            return image
+
+        alpha_channel = np.array(image)[:, :, 3]
+        non_zero_indices = np.argwhere(alpha_channel > 0)
+        if non_zero_indices.size == 0:
+            raise ValueError("Image is fully transparent")
+
+        min_row, min_col = non_zero_indices.min(axis=0)
+        max_row, max_col = non_zero_indices.max(axis=0)
+
+        cropped_image = image.crop((min_col, min_row, max_col + 1, max_row + 1))
+
+        width, height = cropped_image.size
+        border_width = int(width * border_ratio)
+        border_height = int(height * border_ratio)
+
+        new_width = width + 2 * border_width
+        new_height = height + 2 * border_height
+
+        square_size = max(new_width, new_height)
+
+        new_image = Image.new('RGBA', (square_size, square_size), (255, 255, 255, 0))
+
+        paste_x = (square_size - new_width) // 2 + border_width
+        paste_y = (square_size - new_height) // 2 + border_height
+
+        new_image.paste(cropped_image, (paste_x, paste_y))
+        return new_image
+
     @torch.no_grad()
     def __call__(self, mesh, image):
 
@@ -158,6 +187,8 @@ class Hunyuan3DPaintPipeline:
         else:
             image_prompt = image
 
+        image_prompt = self.recenter_image(image_prompt)
+
         image_prompt = self.models['delight_model'](image_prompt)
 
         mesh = mesh_uv_wrap(mesh)
@@ -178,6 +209,7 @@ class Hunyuan3DPaintPipeline:
         multiviews = self.models['multiview_model'](image_prompt, normal_maps + position_maps, camera_info)
 
         for i in range(len(multiviews)):
+            #     multiviews[i] = self.models['super_model'](multiviews[i])
             multiviews[i] = multiviews[i].resize(
                 (self.config.render_size, self.config.render_size))
 
diff --git a/hy3dgen/texgen/utils/__init__.py b/hy3dgen/texgen/utils/__init__.py
index e307c3f8c1292da02f308e4b59ef0bcd6fe7305e..8bb2bf86e283e50f0df2ecfba8fc66289df09901 100644
--- a/hy3dgen/texgen/utils/__init__.py
+++ b/hy3dgen/texgen/utils/__init__.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -20,4 +10,4 @@
 # their software and algorithms, including trained model weights, parameters (including
 # optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
 # fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
\ No newline at end of file
diff --git a/hy3dgen/texgen/utils/alignImg4Tex_utils.py b/hy3dgen/texgen/utils/alignImg4Tex_utils.py
index 0a09c17cfe1a3f1ac850688e96b66341f0226418..34df2041e598b2cbf92a345f4d003c53437dfb92 100644
--- a/hy3dgen/texgen/utils/alignImg4Tex_utils.py
+++ b/hy3dgen/texgen/utils/alignImg4Tex_utils.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -22,7 +12,6 @@
 # fine-tuning enabling code and other elements of the foregoing made publicly available
 # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
-
 import torch
 from diffusers import EulerAncestralDiscreteScheduler
 from diffusers import StableDiffusionControlNetPipeline, StableDiffusionXLControlNetImg2ImgPipeline, ControlNetModel, \
diff --git a/hy3dgen/texgen/utils/counter_utils.py b/hy3dgen/texgen/utils/counter_utils.py
index e0374fc327ad2127ec84bb0c267c19a3b9c8d738..383a5151cb22e1c965a8432cc5a0ac312eaa9ab2 100644
--- a/hy3dgen/texgen/utils/counter_utils.py
+++ b/hy3dgen/texgen/utils/counter_utils.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
diff --git a/hy3dgen/texgen/utils/dehighlight_utils.py b/hy3dgen/texgen/utils/dehighlight_utils.py
index d9724aef4f0f8057473ea26e1fe248592b616608..6e6105f4537d3c9be98b1b1a33820178c262503e 100644
--- a/hy3dgen/texgen/utils/dehighlight_utils.py
+++ b/hy3dgen/texgen/utils/dehighlight_utils.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -44,6 +34,33 @@ class Light_Shadow_Remover():
         pipeline.set_progress_bar_config(disable=True)
 
         self.pipeline = pipeline.to(self.device, torch.float16)
+    
+    def recorrect_rgb(self, src_image, target_image, alpha_channel, scale=0.95):
+        
+        def flat_and_mask(bgr, a):
+            mask = torch.where(a > 0.5, True, False)
+            bgr_flat = bgr.reshape(-1, bgr.shape[-1])
+            mask_flat = mask.reshape(-1)
+            bgr_flat_masked = bgr_flat[mask_flat, :]
+            return bgr_flat_masked
+        
+        src_flat = flat_and_mask(src_image, alpha_channel)
+        target_flat = flat_and_mask(target_image, alpha_channel)
+        corrected_bgr = torch.zeros_like(src_image)
+
+        for i in range(3): 
+            src_mean, src_stddev = torch.mean(src_flat[:, i]), torch.std(src_flat[:, i])
+            target_mean, target_stddev = torch.mean(target_flat[:, i]), torch.std(target_flat[:, i])
+            corrected_bgr[:, :, i] = torch.clamp((src_image[:, :, i] - scale * src_mean) * (target_stddev / src_stddev) + scale * target_mean, 0, 1)
+
+        src_mse = torch.mean((src_image - target_image) ** 2)
+        modify_mse = torch.mean((corrected_bgr - target_image) ** 2)
+        if src_mse < modify_mse:
+            corrected_bgr = torch.cat([src_image, alpha_channel], dim=-1)
+        else: 
+            corrected_bgr = torch.cat([corrected_bgr, alpha_channel], dim=-1)
+
+        return corrected_bgr
 
     @torch.no_grad()
     def __call__(self, image):
@@ -81,4 +98,10 @@ class Light_Shadow_Remover():
             guidance_scale=self.cfg_text,
         ).images[0]
 
+        image_tensor = torch.tensor(np.array(image)/255.0).to(self.device)
+        rgb_src = image_tensor[:,:,:3]
+        image = self.recorrect_rgb(rgb_src, rgb_target, alpha)
+        image = image[:,:,:3]*image[:,:,3:] + torch.ones_like(image[:,:,:3])*(1.0-image[:,:,3:])
+        image = Image.fromarray((image.cpu().numpy()*255).astype(np.uint8))
+
         return image
diff --git a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/__init__.py b/hy3dgen/texgen/utils/imagesuper_utils.py
similarity index 53%
rename from hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/__init__.py
rename to hy3dgen/texgen/utils/imagesuper_utils.py
index df40dcc8d4819eb903263ff1faf70ce902eb7e07..0b893c53a4571b4d7eb5adf7b199e4152c0a227b 100644
--- a/hy3dgen/texgen/custom_rasterizer/build/lib.linux-x86_64-cpython-311/custom_rasterizer/__init__.py
+++ b/hy3dgen/texgen/utils/imagesuper_utils.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -22,11 +12,23 @@
 # fine-tuning enabling code and other elements of the foregoing made publicly available
 # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
-'''
-from .hierarchy import BuildHierarchy, BuildHierarchyWithColor
-from .io_obj import LoadObj, LoadObjWithTexture
-from .render import rasterize, interpolate
-'''
-from .io_glb import *
-from .io_obj import *
-from .render import *
+import torch
+from diffusers import StableDiffusionUpscalePipeline
+
+class Image_Super_Net():
+    def __init__(self, config):
+        self.up_pipeline_x4 = StableDiffusionUpscalePipeline.from_pretrained(
+                        'stabilityai/stable-diffusion-x4-upscaler',
+                        torch_dtype=torch.float16,
+                    ).to(config.device)
+        self.up_pipeline_x4.set_progress_bar_config(disable=True)
+
+    def __call__(self, image, prompt=''):
+        with torch.no_grad():
+            upscaled_image = self.up_pipeline_x4(
+                prompt=[prompt],
+                image=image,
+                num_inference_steps=5,
+            ).images[0]
+
+        return upscaled_image
diff --git a/hy3dgen/texgen/utils/multiview_utils.py b/hy3dgen/texgen/utils/multiview_utils.py
index de10b6b9dbe8667be1905c7f4039b6cc28d39bcd..121c04947d1bd18d8eb48c01e218d6637509450d 100644
--- a/hy3dgen/texgen/utils/multiview_utils.py
+++ b/hy3dgen/texgen/utils/multiview_utils.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
diff --git a/hy3dgen/texgen/utils/simplify_mesh_utils.py b/hy3dgen/texgen/utils/simplify_mesh_utils.py
index 915284d337e648c57fae886dee3333c0203856b6..5c239993dd1498feb1d95e32f6b0df4a3e88da1e 100644
--- a/hy3dgen/texgen/utils/simplify_mesh_utils.py
+++ b/hy3dgen/texgen/utils/simplify_mesh_utils.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
diff --git a/hy3dgen/texgen/utils/uv_warp_utils.py b/hy3dgen/texgen/utils/uv_warp_utils.py
index b14bc7e58a089e2dc1c74242c527a539a05d8478..f55a924f16aa6b2dce3347f39ff7e8ef190065f4 100644
--- a/hy3dgen/texgen/utils/uv_warp_utils.py
+++ b/hy3dgen/texgen/utils/uv_warp_utils.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -30,8 +20,8 @@ def mesh_uv_wrap(mesh):
     if isinstance(mesh, trimesh.Scene):
         mesh = mesh.dump(concatenate=True)
 
-    if len(mesh.faces) > 50000:
-        raise ValueError("The mesh has more than 50,000 faces, which is not supported.")
+    if len(mesh.faces) > 500000000:
+        raise ValueError("The mesh has more than 500,000,000 faces, which is not supported.")
 
     vmapping, indices, uvs = xatlas.parametrize(mesh.vertices, mesh.faces)
 
diff --git a/hy3dgen/text2image.py b/hy3dgen/text2image.py
index bea52937bea4fffa306f34031f594e26c1ff27a4..2c8a3ab373750ab8ab12a497f15a03179f49ea07 100644
--- a/hy3dgen/text2image.py
+++ b/hy3dgen/text2image.py
@@ -1,13 +1,3 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-
 # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
 # except for the third-party components listed below.
 # Hunyuan 3D does not impose any additional limitations beyond what is outlined
@@ -22,7 +12,6 @@
 # fine-tuning enabling code and other elements of the foregoing made publicly available
 # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
-
 import os
 import random
 
@@ -80,9 +69,9 @@ class HunyuanDiTPipeline:
         generator = torch.Generator(device=self.pipe.device)
         generator = generator.manual_seed(int(seed))
         out_img = self.pipe(
-            prompt=self.pos_txt+prompt,
+            prompt=prompt[:60] + self.pos_txt,
             negative_prompt=self.neg_txt,
-            num_inference_steps=20,
+            num_inference_steps=25,
             pag_scale=1.3,
             width=1024,
             height=1024,
diff --git a/requirements.txt b/requirements.txt
index 70b0d154fde03b11b4d54117f8c8fb6c45385b21..abdab84e043a82dc588767e52e925971dd183d4f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,36 +1,40 @@
-gradio_litmodel3d
 ninja
 pybind11
-trimesh
+
 diffusers
-tqdm
 einops
 opencv-python
 numpy
 torch
 transformers
 torchvision
-torchaudio
-ConfigArgParse
-xatlas
-scikit-learn
-scikit-image
-tritonclient
-gevent
-geventhttpclient
-facexlib
-accelerate
-ipdb
+#taming-transformers-rom1504
+#ConfigArgParse
+#ipdb
 omegaconf
+
+#sentencepiece
+tqdm
+
+# Mesh Processing
+trimesh
 pymeshlab
-pytorch_lightning
-taming-transformers-rom1504
-kornia
-rembg
-onnxruntime
 pygltflib
-sentencepiece
+xatlas
+#kornia
+#facexlib
+
+# Training
+accelerate
+#pytorch_lightning
+#scikit-learn
+#scikit-image
+
+# Demo only
 gradio
+fastapi
 uvicorn
-fastapi==0.112.2
-wheel
+rembg
+onnxruntime
+#gevent
+#geventhttpclient
\ No newline at end of file