Spaces:

hyz317
/

StdGEN

Running on L40S

App Files Files Community

YulianSa commited on Mar 16

Commit

216a665

1 Parent(s): f1e6905

update

Browse files

Files changed (4) hide show

app.py +14 -14
infer_api.py +68 -73
refine/mesh_refine.py +168 -14
slrm/models/lrm_mesh.py +2 -2

app.py CHANGED Viewed

@@ -10,20 +10,20 @@ import os
 import shlex
 import subprocess
-os.makedirs("./ckpt", exist_ok=True)
-# download ViT-H SAM model into ./ckpt
-subprocess.call(["wget", "-q", "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth", "-O", "./ckpt/sam_vit_h_4b8939.pth"])
-subprocess.run(
-    shlex.split(
-        "pip install pip==24.0"
-    )
-)
-subprocess.run(
-    shlex.split(
-        "pip install package/nvdiffrast-0.3.1.torch-cp310-cp310-linux_x86_64.whl --force-reinstall --no-deps"
-    )
-)
 from infer_api import InferAPI

 import shlex
 import subprocess
+# os.makedirs("./ckpt", exist_ok=True)
+# # download ViT-H SAM model into ./ckpt
+# subprocess.call(["wget", "-q", "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth", "-O", "./ckpt/sam_vit_h_4b8939.pth"])
+# subprocess.run(
+#     shlex.split(
+#         "pip install pip==24.0"
+#     )
+# )
+# subprocess.run(
+#     shlex.split(
+#         "pip install package/nvdiffrast-0.3.1.torch-cp310-cp310-linux_x86_64.whl --force-reinstall --no-deps"
+#     )
+# )
 from infer_api import InferAPI

infer_api.py CHANGED Viewed

@@ -12,6 +12,7 @@ from omegaconf import OmegaConf
 import numpy as np
 import torch
 from diffusers import AutoencoderKL, DDIMScheduler
 from diffusers.utils import check_min_version
@@ -72,7 +73,7 @@ from slrm.utils.camera_util import (
     FOV_to_intrinsics,
     get_circular_camera_poses,
 )
-from slrm.utils.mesh_util import save_obj, save_glb
 from slrm.utils.infer_util import images_to_video
 import cv2
@@ -477,7 +478,7 @@ def calc_horizontal_offset2(target_mask, source_img):
 @spaces.GPU
-def get_distract_mask(generator, color_0, color_1, normal_0=None, normal_1=None, thres=0.25, ratio=0.50, outside_thres=0.10, outside_ratio=0.20):
     distract_area = np.abs(color_0 - color_1).sum(axis=-1) > thres
     if normal_0 is not None and normal_1 is not None:
         distract_area |= np.abs(normal_0 - normal_1).sum(axis=-1) > thres
@@ -503,43 +504,7 @@ def get_distract_mask(generator, color_0, color_1, normal_0=None, normal_1=None,
         max_x, max_y = bbox.max(axis=0)
         distract_bbox[min_x:max_x, min_y:max_y] = 1
-    points = np.array(random_sampled_points)[:, ::-1]
-    labels = np.ones(len(points), dtype=np.int32)
-    masks = generator.generate((color_1 * 255).astype(np.uint8))
-    outside_area = np.abs(color_0 - color_1).sum(axis=-1) < outside_thres
-    final_mask = np.zeros_like(distract_mask)
-    for iii, mask in enumerate(masks):
-        mask['segmentation'] = cv2.resize(mask['segmentation'].astype(np.float32), (1024, 1024)) > 0.5
-        intersection = np.logical_and(mask['segmentation'], distract_mask).sum()
-        total = mask['segmentation'].sum()
-        iou = intersection / total
-        outside_intersection = np.logical_and(mask['segmentation'], outside_area).sum()
-        outside_total = mask['segmentation'].sum()
-        outside_iou = outside_intersection / outside_total
-        if iou > ratio and outside_iou < outside_ratio:
-            final_mask |= mask['segmentation']
-    # calculate coverage
-    intersection = np.logical_and(final_mask, distract_mask).sum()
-    total = distract_mask.sum()
-    coverage = intersection / total
-    if coverage < 0.8:
-        # use original distract mask
-        final_mask = (distract_mask.copy() * 255).astype(np.uint8)
-        final_mask = cv2.dilate(final_mask, np.ones((3, 3), np.uint8), iterations=3)
-        labeled_array_dilate, num_features_dilate = scipy.ndimage.label(final_mask)
-        for i in range(num_features_dilate + 1):
-            if np.sum(labeled_array_dilate == i) < 200:
-                final_mask[labeled_array_dilate == i] = 255
-        final_mask = cv2.erode(final_mask, np.ones((3, 3), np.uint8), iterations=3)
-        final_mask = final_mask > 127
-    return distract_mask, distract_bbox, random_sampled_points, final_mask
 # infer_refine_sam = sam_model_registry["vit_h"](checkpoint="./ckpt/sam_vit_h_4b8939.pth").cuda()
@@ -563,6 +528,7 @@ def infer_refine(meshes, imgs):
     distract_mask = None
     results = []
     for name_idx, level in zip([2, 0, 1], [2, 1, 0]):
         mesh = trimesh.load(meshes[name_idx])
@@ -607,11 +573,11 @@ def infer_refine(meshes, imgs):
             colors.append(color)
             normals.append(normal)
-        # if last_front_color is not None and level == 0:
-        #     original_mask, distract_bbox, _, distract_mask = get_distract_mask(infer_refine_generator, last_front_color, np.array(colors[0]).astype(np.float32) / 255.0, outside_ratio=infer_refine_outside_ratio)
-        # else:
-        distract_mask = None
-        distract_bbox = None
         if last_colors is None:
             from copy import deepcopy
@@ -625,15 +591,15 @@ def infer_refine(meshes, imgs):
             _, idx_anchor = kdtree_anchor.query(mesh_v, k=1)
             _, idx_mesh_v = kdtree_mesh_v.query(mesh_v, k=25)
             idx_anchor = idx_anchor.squeeze()
-            neighbors = torch.tensor(mesh_v)[idx_mesh_v]  # V, 25, 3
             # calculate the distances neighbors [V, 25, 3]; mesh_v [V, 3] -> [V, 25]
-            neighbor_dists = torch.norm(neighbors - torch.tensor(mesh_v)[:, None], dim=-1)
             neighbor_dists[neighbor_dists > 0.06] = 114514.
             neighbor_weights = torch.exp(-neighbor_dists * 1.)
             neighbor_weights = neighbor_weights / neighbor_weights.sum(dim=1, keepdim=True)
             anchors = fixed_v[idx_anchor]  # V, 3
             anchor_normals = calc_vertex_normals(fixed_v, fixed_f)[idx_anchor]  # V, 3
-            dis_anchor = torch.clamp(((anchors - torch.tensor(mesh_v)) * anchor_normals).sum(-1), min=0) + 0.01
             vec_anchor = dis_anchor[:, None] * anchor_normals  # V, 3
             vec_anchor = vec_anchor[idx_mesh_v]  # V, 25, 3
             weighted_vec_anchor = (vec_anchor * neighbor_weights[:, :, None]).sum(1)  # V, 3
@@ -647,7 +613,7 @@ def infer_refine(meshes, imgs):
         # my mesh flow weight by nearest vertexs
         try:
             if fixed_v is not None and fixed_f is not None and level != 0:
-                new_mesh_v = new_mesh.verts_packed().cpu().numpy()
                 fixed_v_cpu = fixed_v.cpu().numpy()
                 kdtree_anchor = KDTree(fixed_v_cpu)
@@ -655,48 +621,60 @@ def infer_refine(meshes, imgs):
                 _, idx_anchor = kdtree_anchor.query(new_mesh_v, k=1)
                 _, idx_mesh_v = kdtree_mesh_v.query(new_mesh_v, k=25)
                 idx_anchor = idx_anchor.squeeze()
-                neighbors = torch.tensor(new_mesh_v)[idx_mesh_v]  # V, 25, 3
                 # calculate the distances neighbors [V, 25, 3]; new_mesh_v [V, 3] -> [V, 25]
-                neighbor_dists = torch.norm(neighbors - torch.tensor(new_mesh_v)[:, None], dim=-1)
                 neighbor_dists[neighbor_dists > 0.06] = 114514.
                 neighbor_weights = torch.exp(-neighbor_dists * 1.)
                 neighbor_weights = neighbor_weights / neighbor_weights.sum(dim=1, keepdim=True)
                 anchors = fixed_v[idx_anchor]  # V, 3
                 anchor_normals = calc_vertex_normals(fixed_v, fixed_f)[idx_anchor]  # V, 3
-                dis_anchor = torch.clamp(((anchors - torch.tensor(new_mesh_v)) * anchor_normals).sum(-1), min=0) + 0.01
                 vec_anchor = dis_anchor[:, None] * anchor_normals  # V, 3
                 vec_anchor = vec_anchor[idx_mesh_v]  # V, 25, 3
                 weighted_vec_anchor = (vec_anchor * neighbor_weights[:, :, None]).sum(1)  # V, 3
                 new_mesh_v += weighted_vec_anchor.cpu().numpy()
                 # replace new_mesh verts with new_mesh_v
-                new_mesh = Meshes(verts=[torch.tensor(new_mesh_v)], faces=new_mesh.faces_list(), textures=new_mesh.textures)
         except Exception as e:
             pass
-        notsimp_v, notsimp_f, notsimp_t = new_mesh.verts_packed(), new_mesh.faces_packed(), new_mesh.textures.verts_features_packed()
         if fixed_v is None:
             fixed_v, fixed_f = simp_v, simp_f
-            complete_v, complete_f, complete_t = notsimp_v, notsimp_f, notsimp_t
         else:
             fixed_f = torch.cat([fixed_f, simp_f + fixed_v.shape[0]], dim=0)
             fixed_v = torch.cat([fixed_v, simp_v], dim=0)
-            complete_f = torch.cat([complete_f, notsimp_f + complete_v.shape[0]], dim=0)
-            complete_v = torch.cat([complete_v, notsimp_v], dim=0)
-            complete_t = torch.cat([complete_t, notsimp_t], dim=0)
         if level == 2:
-            new_mesh = Meshes(verts=[new_mesh.verts_packed()], faces=[new_mesh.faces_packed()], textures=pytorch3d.renderer.mesh.textures.TexturesVertex(verts_features=[torch.ones_like(new_mesh.textures.verts_features_packed(), device=new_mesh.verts_packed().device)*0.5]))
-        save_py3dmesh_with_trimesh_fast(new_mesh, meshes[name_idx].replace('.obj', '_refined.obj'), apply_sRGB_to_LinearRGB=False)
-        results.append(meshes[name_idx].replace('.obj', '_refined.obj'))
     # save whole mesh
-    save_py3dmesh_with_trimesh_fast(Meshes(verts=[complete_v], faces=[complete_f], textures=pytorch3d.renderer.mesh.textures.TexturesVertex(verts_features=[complete_t])), meshes[name_idx].replace('.obj', '_refined_whole.obj'), apply_sRGB_to_LinearRGB=False)
-    results.append(meshes[name_idx].replace('.obj', '_refined_whole.obj'))
     return results
@@ -749,7 +727,7 @@ def infer_slrm_make3d(images):
     return mesh_glb_fpaths
 @spaces.GPU
-def infer_slrm_make_mesh(mesh_fpath, planes, level=None):
     mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
     mesh_dirname = os.path.dirname(mesh_fpath)
@@ -757,19 +735,36 @@ def infer_slrm_make_mesh(mesh_fpath, planes, level=None):
         # get mesh
         mesh_out = infer_slrm_model.extract_mesh(
             planes,
-            use_texture_map=False,
             levels=torch.tensor([level]).to(device),
             **infer_slrm_infer_config,
         )
-        vertices, faces, vertex_colors = mesh_out
-        vertices = vertices[:, [1, 2, 0]]
-        if level == 2:
-            # fill all vertex_colors with 127
-            vertex_colors = np.ones_like(vertex_colors) * 127
-        save_obj(vertices, faces, vertex_colors, mesh_fpath)
     return mesh_fpath

 import numpy as np
 import torch
+from pygltflib import GLTF2, Material, PbrMetallicRoughness
 from diffusers import AutoencoderKL, DDIMScheduler
 from diffusers.utils import check_min_version
     FOV_to_intrinsics,
     get_circular_camera_poses,
 )
+from slrm.utils.mesh_util import save_obj, save_glb, save_obj_with_mtl
 from slrm.utils.infer_util import images_to_video
 import cv2
 @spaces.GPU
+def get_distract_mask(color_0, color_1, normal_0=None, normal_1=None, thres=0.25, ratio=0.50, outside_thres=0.10, outside_ratio=0.20):
     distract_area = np.abs(color_0 - color_1).sum(axis=-1) > thres
     if normal_0 is not None and normal_1 is not None:
         distract_area |= np.abs(normal_0 - normal_1).sum(axis=-1) > thres
         max_x, max_y = bbox.max(axis=0)
         distract_bbox[min_x:max_x, min_y:max_y] = 1
+    return distract_mask, distract_bbox
 # infer_refine_sam = sam_model_registry["vit_h"](checkpoint="./ckpt/sam_vit_h_4b8939.pth").cuda()
     distract_mask = None
     results = []
+    mesh_list = []
     for name_idx, level in zip([2, 0, 1], [2, 1, 0]):
         mesh = trimesh.load(meshes[name_idx])
             colors.append(color)
             normals.append(normal)
+        if last_front_color is not None and level == 0:
+            distract_mask, distract_bbox = get_distract_mask(last_front_color, np.array(colors[0]).astype(np.float32) / 255.0)
+        else:
+            distract_mask = None
+            distract_bbox = None
         if last_colors is None:
             from copy import deepcopy
             _, idx_anchor = kdtree_anchor.query(mesh_v, k=1)
             _, idx_mesh_v = kdtree_mesh_v.query(mesh_v, k=25)
             idx_anchor = idx_anchor.squeeze()
+            neighbors = torch.tensor(mesh_v).cuda()[idx_mesh_v]  # V, 25, 3
             # calculate the distances neighbors [V, 25, 3]; mesh_v [V, 3] -> [V, 25]
+            neighbor_dists = torch.norm(neighbors - torch.tensor(mesh_v).cuda()[:, None], dim=-1)
             neighbor_dists[neighbor_dists > 0.06] = 114514.
             neighbor_weights = torch.exp(-neighbor_dists * 1.)
             neighbor_weights = neighbor_weights / neighbor_weights.sum(dim=1, keepdim=True)
             anchors = fixed_v[idx_anchor]  # V, 3
             anchor_normals = calc_vertex_normals(fixed_v, fixed_f)[idx_anchor]  # V, 3
+            dis_anchor = torch.clamp(((anchors - torch.tensor(mesh_v).cuda()) * anchor_normals).sum(-1), min=0) + 0.01
             vec_anchor = dis_anchor[:, None] * anchor_normals  # V, 3
             vec_anchor = vec_anchor[idx_mesh_v]  # V, 25, 3
             weighted_vec_anchor = (vec_anchor * neighbor_weights[:, :, None]).sum(1)  # V, 3
         # my mesh flow weight by nearest vertexs
         try:
             if fixed_v is not None and fixed_f is not None and level != 0:
+                new_mesh_v = new_mesh.vertices.copy()
                 fixed_v_cpu = fixed_v.cpu().numpy()
                 kdtree_anchor = KDTree(fixed_v_cpu)
                 _, idx_anchor = kdtree_anchor.query(new_mesh_v, k=1)
                 _, idx_mesh_v = kdtree_mesh_v.query(new_mesh_v, k=25)
                 idx_anchor = idx_anchor.squeeze()
+                neighbors = torch.tensor(new_mesh_v).cuda()[idx_mesh_v]  # V, 25, 3
                 # calculate the distances neighbors [V, 25, 3]; new_mesh_v [V, 3] -> [V, 25]
+                neighbor_dists = torch.norm(neighbors - torch.tensor(new_mesh_v).cuda()[:, None], dim=-1)
                 neighbor_dists[neighbor_dists > 0.06] = 114514.
                 neighbor_weights = torch.exp(-neighbor_dists * 1.)
                 neighbor_weights = neighbor_weights / neighbor_weights.sum(dim=1, keepdim=True)
                 anchors = fixed_v[idx_anchor]  # V, 3
                 anchor_normals = calc_vertex_normals(fixed_v, fixed_f)[idx_anchor]  # V, 3
+                dis_anchor = torch.clamp(((anchors - torch.tensor(new_mesh_v).cuda()) * anchor_normals).sum(-1), min=0) + 0.01
                 vec_anchor = dis_anchor[:, None] * anchor_normals  # V, 3
                 vec_anchor = vec_anchor[idx_mesh_v]  # V, 25, 3
                 weighted_vec_anchor = (vec_anchor * neighbor_weights[:, :, None]).sum(1)  # V, 3
                 new_mesh_v += weighted_vec_anchor.cpu().numpy()
                 # replace new_mesh verts with new_mesh_v
+                new_mesh.vertices = new_mesh_v
         except Exception as e:
             pass
         if fixed_v is None:
             fixed_v, fixed_f = simp_v, simp_f
         else:
             fixed_f = torch.cat([fixed_f, simp_f + fixed_v.shape[0]], dim=0)
             fixed_v = torch.cat([fixed_v, simp_v], dim=0)
+        mesh_list.append(new_mesh)
         if level == 2:
+            new_mesh = trimesh.Trimesh(simp_v.cpu().numpy(), simp_f.cpu().numpy(), process=False)
+        new_mesh.export(meshes[name_idx].replace('.obj', '_refined.glb'))
+        results.append(meshes[name_idx].replace('.obj', '_refined.glb'))
+        gltf = GLTF2().load(meshes[name_idx].replace('.obj', '_refined.glb'))
+        for material in gltf.materials:
+            if material.pbrMetallicRoughness:
+                material.pbrMetallicRoughness.baseColorFactor = [1.0, 1.0, 1.0, 100.0]
+                material.pbrMetallicRoughness.metallicFactor = 0.0
+                material.pbrMetallicRoughness.roughnessFactor = 1.0
+        gltf.save(meshes[name_idx].replace('.obj', '_refined.glb'))
     # save whole mesh
+    scene = trimesh.Scene(mesh_list)
+    scene.export(meshes[name_idx].replace('.obj', '_refined_whole.glb'))
+    results.append(meshes[name_idx].replace('.obj', '_refined_whole.glb'))
+    gltf = GLTF2().load(meshes[name_idx].replace('.obj', '_refined_whole.glb'))
+    for material in gltf.materials:
+        if material.pbrMetallicRoughness:
+            material.pbrMetallicRoughness.baseColorFactor = [1.0, 1.0, 1.0, 100.0]
+            material.pbrMetallicRoughness.metallicFactor = 0.0
+            material.pbrMetallicRoughness.roughnessFactor = 1.0
+    gltf.save(meshes[name_idx].replace('.obj', '_refined_whole.glb'))
     return results
     return mesh_glb_fpaths
 @spaces.GPU
+def infer_slrm_make_mesh(mesh_fpath, planes, level=None, use_texture_map=False):
     mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
     mesh_dirname = os.path.dirname(mesh_fpath)
         # get mesh
         mesh_out = infer_slrm_model.extract_mesh(
             planes,
+            use_texture_map=use_texture_map,
             levels=torch.tensor([level]).to(device),
             **infer_slrm_infer_config,
         )
+        if use_texture_map:
+            vertices, faces, uvs, mesh_tex_idx, tex_map = mesh_out
+            vertices = vertices[:, [1, 2, 0]]
+            tex_map = tex_map.permute(1, 2, 0).data.cpu().numpy()
+            if level == 2:
+                # fill all vertex_colors with 127
+                tex_map = np.ones_like(tex_map) * 127
+            save_obj_with_mtl(
+                vertices.data.cpu().numpy(),
+                uvs.data.cpu().numpy(),
+                faces.data.cpu().numpy(),
+                mesh_tex_idx.data.cpu().numpy(),
+                tex_map,
+                mesh_fpath
+            )
+        else:
+            vertices, faces, vertex_colors = mesh_out
+            vertices = vertices[:, [1, 2, 0]]
+            if level == 2:
+                # fill all vertex_colors with 127
+                vertex_colors = np.ones_like(vertex_colors) * 127
+            save_obj(vertices, faces, vertex_colors, mesh_fpath)
     return mesh_fpath

refine/mesh_refine.py CHANGED Viewed

@@ -13,6 +13,104 @@ from refine.render import NormalsRenderer, calc_vertex_normals
 import pytorch3d
 from pytorch3d.structures import Meshes
 def remove_color(arr):
     if arr.shape[-1] == 4:
@@ -301,11 +399,11 @@ def geo_refine_1(mesh_v, mesh_f, rgb_ls, normal_ls, expansion_weight=0.1, fixed_
         return mesh_v, mesh_f
     vertices, faces = reconstruct_stage1(rm_normals, steps=200, vertices=mesh_v, faces=mesh_f, fixed_v=fixed_v, fixed_f=fixed_f,
-                                         lr=stage1_lr, remesh_interval=stage1_remesh_interval, start_edge_len=0.02,
-                                         end_edge_len=0.005, gain=0.05, loss_expansion_weight=expansion_weight,
                                          distract_mask=distract_mask, distract_bbox=distract_bbox)
-    vertices, faces = run_mesh_refine(vertices, faces, rm_normals, fixed_v=fixed_v, fixed_f=fixed_f, steps=100, start_edge_len=0.005, end_edge_len=0.0002,
                                       decay=0.99, update_normal_interval=20, update_warmup=5, process_inputs=False, process_outputs=False, remesh_interval=1)
     return vertices, faces
@@ -314,21 +412,77 @@ def geo_refine_2(vertices, faces, fixed_v=None):
     simp_vertices, simp_faces = meshes.verts_packed(), meshes.faces_packed()
     vertices, faces = simp_vertices.detach().cpu().numpy(), simp_faces.detach().cpu().numpy()
     # vertices, faces = trimesh.remesh.subdivide(vertices, faces)
-    if fixed_v is not None:
-        vertices, faces = trimesh.remesh.subdivide(vertices, faces)
     return vertices, faces
-def geo_refine_3(vertices, faces, rgb_ls, fixed_v=None, fixed_f=None, distract_mask=None):
-    origin_len_v, origin_len_f = len(vertices), len(faces)
     # concatenate fixed_v and fixed_f
     if fixed_v is not None and fixed_f is not None:
-        vertices, faces = np.concatenate([vertices, fixed_v.detach().cpu().numpy()], axis=0), np.concatenate([faces, fixed_f.detach().cpu().numpy() + len(vertices)], axis=0)
-    vertices, faces = torch.tensor(vertices, device='cuda'), torch.tensor(faces, device='cuda')
     # reconstruct meshes
-    meshes = Meshes(verts=[vertices], faces=[faces], textures=pytorch3d.renderer.mesh.textures.TexturesVertex([torch.zeros_like(vertices).float()]))
     new_meshes = multiview_color_projection(meshes, rgb_ls, resolution=1024, device="cuda", complete_unseen=True, confidence_threshold=0.2, cameras_list = get_cameras_list([180, 225, 270, 0, 90, 135], "cuda", focal=1/1.2), weights=[2.0, 0.5, 0.0, 1.0, 0.0, 0.5] if distract_mask is None else [2.0, 0.0, 0.5, 1.0, 0.5, 0.0], distract_mask=distract_mask)
-    # exclude fixed_v and fixed_f
     if fixed_v is not None and fixed_f is not None:
-        new_meshes = Meshes(verts=[new_meshes.verts_packed()[:origin_len_v]], faces=[new_meshes.faces_packed()[:origin_len_f]],
-                            textures=pytorch3d.renderer.mesh.textures.TexturesVertex([new_meshes.textures.verts_features_packed()[:origin_len_v]]))
-    return new_meshes.to("cpu"), vertices.cpu(), faces.cpu()

 import pytorch3d
 from pytorch3d.structures import Meshes
+import xatlas
+import cv2
+def mesh_uv_wrap(vertices, faces):
+    if len(faces) > 50000:
+        raise ValueError("The mesh has more than 50,000 faces, which is not supported.")
+    vmapping, indices, uvs = xatlas.parametrize(vertices, faces)
+    return vertices[vmapping], indices, uvs
+def stride_from_shape(shape):
+    stride = [1]
+    for x in reversed(shape[1:]):
+        stride.append(stride[-1] * x)
+    return list(reversed(stride))
+def scatter_add_nd_with_count(input, count, indices, values, weights=None):
+    # input: [..., C], D dimension + C channel
+    # count: [..., 1], D dimension
+    # indices: [N, D], long
+    # values: [N, C]
+    D = indices.shape[-1]
+    C = input.shape[-1]
+    size = input.shape[:-1]
+    stride = stride_from_shape(size)
+    assert len(size) == D
+    input = input.view(-1, C)  # [HW, C]
+    count = count.view(-1, 1)
+    flatten_indices = (indices * torch.tensor(stride,
+                                              dtype=torch.long, device=indices.device)).sum(-1)  # [N]
+    if weights is None:
+        weights = torch.ones_like(values[..., :1])
+    input.scatter_add_(0, flatten_indices.unsqueeze(1).repeat(1, C), values)
+    count.scatter_add_(0, flatten_indices.unsqueeze(1), weights)
+    return input.view(*size, C), count.view(*size, 1)
+def linear_grid_put_2d(H, W, coords, values, return_count=False):
+    # coords: [N, 2], float in [0, 1]
+    # values: [N, C]
+    C = values.shape[-1]
+    indices = coords * torch.tensor(
+        [H - 1, W - 1], dtype=torch.float32, device=coords.device
+    )
+    indices_00 = indices.floor().long()  # [N, 2]
+    indices_00[:, 0].clamp_(0, H - 2)
+    indices_00[:, 1].clamp_(0, W - 2)
+    indices_01 = indices_00 + torch.tensor(
+        [0, 1], dtype=torch.long, device=indices.device
+    )
+    indices_10 = indices_00 + torch.tensor(
+        [1, 0], dtype=torch.long, device=indices.device
+    )
+    indices_11 = indices_00 + torch.tensor(
+        [1, 1], dtype=torch.long, device=indices.device
+    )
+    h = indices[..., 0] - indices_00[..., 0].float()
+    w = indices[..., 1] - indices_00[..., 1].float()
+    w_00 = (1 - h) * (1 - w)
+    w_01 = (1 - h) * w
+    w_10 = h * (1 - w)
+    w_11 = h * w
+    result = torch.zeros(H, W, C, device=values.device,
+                         dtype=values.dtype)  # [H, W, C]
+    count = torch.zeros(H, W, 1, device=values.device,
+                        dtype=values.dtype)  # [H, W, 1]
+    weights = torch.ones_like(values[..., :1])  # [N, 1]
+    result, count = scatter_add_nd_with_count(
+        result, count, indices_00, values * w_00.unsqueeze(1), weights * w_00.unsqueeze(1))
+    result, count = scatter_add_nd_with_count(
+        result, count, indices_01, values * w_01.unsqueeze(1), weights * w_01.unsqueeze(1))
+    result, count = scatter_add_nd_with_count(
+        result, count, indices_10, values * w_10.unsqueeze(1), weights * w_10.unsqueeze(1))
+    result, count = scatter_add_nd_with_count(
+        result, count, indices_11, values * w_11.unsqueeze(1), weights * w_11.unsqueeze(1))
+    if return_count:
+        return result, count
+    mask = (count.squeeze(-1) > 0)
+    result[mask] = result[mask] / count[mask].repeat(1, C)
+    return result, count.squeeze(-1) == 0
 def remove_color(arr):
     if arr.shape[-1] == 4:
         return mesh_v, mesh_f
     vertices, faces = reconstruct_stage1(rm_normals, steps=200, vertices=mesh_v, faces=mesh_f, fixed_v=fixed_v, fixed_f=fixed_f,
+                                         lr=stage1_lr, remesh_interval=stage1_remesh_interval, start_edge_len=0.04,
+                                         end_edge_len=0.02, gain=0.05, loss_expansion_weight=expansion_weight,
                                          distract_mask=distract_mask, distract_bbox=distract_bbox)
+    vertices, faces = run_mesh_refine(vertices, faces, rm_normals, fixed_v=fixed_v, fixed_f=fixed_f, steps=100, start_edge_len=0.02, end_edge_len=0.001,
                                       decay=0.99, update_normal_interval=20, update_warmup=5, process_inputs=False, process_outputs=False, remesh_interval=1)
     return vertices, faces
     simp_vertices, simp_faces = meshes.verts_packed(), meshes.faces_packed()
     vertices, faces = simp_vertices.detach().cpu().numpy(), simp_faces.detach().cpu().numpy()
     # vertices, faces = trimesh.remesh.subdivide(vertices, faces)
     return vertices, faces
+def geo_refine_3(vertices_, faces_, rgb_ls, fixed_v=None, fixed_f=None, distract_mask=None):
+    # vertices, faces, uvs = mesh_uv_wrap(vertices_, faces_)
+    vmapping, indices, uvs = xatlas.parametrize(vertices_, faces_)
+    vertices, faces = vertices_[vmapping], indices
+    def subdivide(vertices, faces, uvs):
+        vertices, faces = trimesh.remesh.subdivide(
+            vertices=np.hstack((vertices, uvs.copy())),
+            faces=faces
+        )
+        return vertices[:, :3], faces, vertices[:, 3:]
+    if fixed_v is not None:
+        dense_atlas_vertices, dense_atlas_faces, dense_atlas_uvs = subdivide(vertices, faces, uvs)
+        dense_atlas_vertices, dense_atlas_faces, dense_atlas_uvs = subdivide(dense_atlas_vertices, dense_atlas_faces, dense_atlas_uvs)
+        # dense_atlas_vertices, dense_atlas_faces, dense_atlas_uvs = subdivide(dense_atlas_vertices, dense_atlas_faces, dense_atlas_uvs)
+        dense_vertices, dense_faces = trimesh.remesh.subdivide(vertices_, faces_)
+        dense_vertices, dense_faces = trimesh.remesh.subdivide(dense_vertices, dense_faces)
+        # dense_vertices, dense_faces = trimesh.remesh.subdivide(dense_vertices, dense_faces)
+    else:
+        dense_atlas_vertices, dense_atlas_faces, dense_atlas_uvs = subdivide(vertices, faces, uvs)
+        dense_atlas_vertices, dense_atlas_faces, dense_atlas_uvs = subdivide(dense_atlas_vertices, dense_atlas_faces, dense_atlas_uvs)
+        dense_vertices, dense_faces = trimesh.remesh.subdivide(vertices_, faces_)
+        dense_vertices, dense_faces = trimesh.remesh.subdivide(dense_vertices, dense_faces)
+    origin_len_v, origin_len_f = len(dense_vertices), len(dense_faces)
     # concatenate fixed_v and fixed_f
     if fixed_v is not None and fixed_f is not None:
+        dense_vertices, dense_faces = np.concatenate([dense_vertices, fixed_v.detach().cpu().numpy()], axis=0), np.concatenate([dense_faces, fixed_f.detach().cpu().numpy() + len(vertices)], axis=0)
+    dense_vertices, dense_faces = torch.from_numpy(dense_vertices).cuda(), torch.from_numpy(dense_faces.astype('int32')).cuda()
     # reconstruct meshes
+    meshes = Meshes(verts=[dense_vertices], faces=[dense_faces], textures=pytorch3d.renderer.mesh.textures.TexturesVertex([torch.zeros_like(dense_vertices).float()]))
     new_meshes = multiview_color_projection(meshes, rgb_ls, resolution=1024, device="cuda", complete_unseen=True, confidence_threshold=0.2, cameras_list = get_cameras_list([180, 225, 270, 0, 90, 135], "cuda", focal=1/1.2), weights=[2.0, 0.5, 0.0, 1.0, 0.0, 0.5] if distract_mask is None else [2.0, 0.0, 0.5, 1.0, 0.5, 0.0], distract_mask=distract_mask)
     if fixed_v is not None and fixed_f is not None:
+        dense_vertices = dense_vertices[:origin_len_v]
+        dense_faces = dense_faces[:origin_len_f]
+        textures = new_meshes.textures.verts_features_packed()[:origin_len_v]
+    else:
+        textures = new_meshes.textures.verts_features_packed()
+    # distances = torch.cdist(torch.tensor(dense_atlas_vertices).cuda(), torch.tensor(dense_vertices).cuda())
+    # nearest_indices = torch.argmin(distances, dim=1)
+    # atlas_textures = textures[nearest_indices]
+    chunk_size = 500
+    atlas_textures_chunks = []
+    for i in range(0, len(dense_atlas_vertices), chunk_size):
+        chunk = dense_atlas_vertices[i:i+chunk_size]
+        distances = torch.cdist(torch.tensor(chunk).cuda(), torch.tensor(dense_vertices).cuda())
+        nearest_indices = torch.argmin(distances, dim=1)
+        atlas_textures_chunks.append(textures[nearest_indices])
+    atlas_textures = torch.cat(atlas_textures_chunks, dim=0)
+    dense_atlas_uvs = torch.tensor(dense_atlas_uvs, dtype=torch.float32).cuda()
+    tex_img, mask = linear_grid_put_2d(1024, 1024, dense_atlas_uvs, atlas_textures)
+    tex_img, mask = tex_img.cpu().numpy(), mask.cpu().numpy()
+    tex_img = cv2.inpaint((tex_img * 255).astype(np.uint8), (mask*255).astype('uint8'), 3, cv2.INPAINT_NS)
+    tex_img = Image.fromarray(np.transpose(tex_img,(1,0,2))[::-1])
+    mesh = trimesh.Trimesh(vertices, faces, process=False)
+    # material = trimesh.visual.texture.SimpleMaterial(image=tex_img, diffuse=(255, 255, 255))
+    material = trimesh.visual.material.PBRMaterial(
+        roughnessFactor=1.0,
+        baseColorTexture=tex_img,
+        baseColorFactor=np.array([255, 255, 255, 255], dtype=np.uint8)
+    )
+    texture_visuals = trimesh.visual.TextureVisuals(uv=uvs, image=tex_img, material=material)
+    mesh.visual = texture_visuals
+    return mesh, torch.tensor(vertices).cuda(), torch.tensor(faces.astype('int64')).cuda()

slrm/models/lrm_mesh.py CHANGED Viewed

@@ -116,13 +116,13 @@ class MeshSLRM(nn.Module):
             camera = OrthogonalCamera(device=device)
         with torch.cuda.amp.autocast(enabled=False):
-            # renderer = NeuralRender(device, camera_model=camera)
             self.geometry = FlexiCubesGeometry(
                 grid_res_xy=self.grid_res_xy,
                 grid_res_z=self.grid_res_z,
                 scale_xy=self.grid_scale_xy,
                 scale_z=self.grid_scale_z,
-                renderer=None,
                 render_type='neural_render',
                 device=device,
             )

             camera = OrthogonalCamera(device=device)
         with torch.cuda.amp.autocast(enabled=False):
+            renderer = NeuralRender(device, camera_model=camera)
             self.geometry = FlexiCubesGeometry(
                 grid_res_xy=self.grid_res_xy,
                 grid_res_z=self.grid_res_z,
                 scale_xy=self.grid_scale_xy,
                 scale_z=self.grid_scale_z,
+                renderer=renderer,
                 render_type='neural_render',
                 device=device,
             )