jadechoghari
/

vfusion3d

@@ -82,55 +82,57 @@ class LRMGenerator(PreTrainedModel):
         assert camera_embeddings.shape[-1] == self.camera_embed_dim, \
             f"Feature dimension mismatch: {camera_embeddings.shape[-1]} vs {self.camera_embed_dim}"
-        # transformer generating planes
-        planes = self.transformer(image_feats, camera_embeddings)
-        assert planes.shape[0] == N, "Batch size mismatch for planes"
-        assert planes.shape[1] == 3, "Planes should have 3 channels"
-        # Generate the mesh
-        if export_mesh:
-          import mcubes
-          import trimesh
-          grid_out = self.synthesizer.forward_grid(planes=planes, grid_size=mesh_size)
-          vtx, faces = mcubes.marching_cubes(grid_out['sigma'].float().squeeze(0).squeeze(-1).cpu().numpy(), 1.0)
-          vtx = vtx / (mesh_size - 1) * 2 - 1
-          vtx_tensor = torch.tensor(vtx, dtype=torch.float32, device=image.device).unsqueeze(0)
-          vtx_colors = self.synthesizer.forward_points(planes, vtx_tensor)['rgb'].float().squeeze(0).cpu().numpy()
-          vtx_colors = (vtx_colors * 255).astype(np.uint8)
-          mesh = trimesh.Trimesh(vertices=vtx, faces=faces, vertex_colors=vtx_colors)
-          mesh_path = "awesome_mesh.obj"
-          mesh.export(mesh_path, 'obj')
-          return planes, mesh_path
-        # Generate video
-        if export_video:
-            render_cameras = self._default_render_cameras(batch_size=N).to(image.device)
-            frames = []
-            chunk_size = 1  # Adjust chunk size as needed
-            for i in range(0, render_cameras.shape[1], chunk_size):
-                frame_chunk = self.synthesizer(
-                    planes,
-                    render_cameras[:, i:i + chunk_size],
-                    render_size,
-                    render_size,
-                    0,
-                    0
-                )
-                frames.append(frame_chunk['images_rgb'])
-            frames = torch.cat(frames, dim=1)
-            frames = (frames.permute(0, 2, 3, 1).cpu().numpy() * 255).astype(np.uint8)
-            # Save video
-            video_path = "awesome_video.mp4"
-            imageio.mimwrite(video_path, frames, fps=fps)
-            return planes, video_path
-        return planes
     # Copied from https://github.com/facebookresearch/vfusion3d/blob/main/lrm/cam_utils.py
     # and https://github.com/facebookresearch/vfusion3d/blob/main/lrm/inferrer.py

         assert camera_embeddings.shape[-1] == self.camera_embed_dim, \
             f"Feature dimension mismatch: {camera_embeddings.shape[-1]} vs {self.camera_embed_dim}"
+        with torch.no_grad():
+            # transformer generating planes
+            planes = self.transformer(image_feats, camera_embeddings)
+            assert planes.shape[0] == N, "Batch size mismatch for planes"
+            assert planes.shape[1] == 3, "Planes should have 3 channels"
+            # Generate the mesh
+            if export_mesh:
+              import mcubes
+              import trimesh
+              grid_out = self.synthesizer.forward_grid(planes=planes, grid_size=mesh_size)
+              vtx, faces = mcubes.marching_cubes(grid_out['sigma'].float().squeeze(0).squeeze(-1).cpu().numpy(), 1.0)
+              vtx = vtx / (mesh_size - 1) * 2 - 1
+              vtx_tensor = torch.tensor(vtx, dtype=torch.float32, device=image.device).unsqueeze(0)
+              vtx_colors = self.synthesizer.forward_points(planes, vtx_tensor)['rgb'].float().squeeze(0).cpu().numpy()
+              vtx_colors = (vtx_colors * 255).astype(np.uint8)
+              mesh = trimesh.Trimesh(vertices=vtx, faces=faces, vertex_colors=vtx_colors)
+              mesh_path = "awesome_mesh.obj"
+              mesh.export(mesh_path, 'obj')
+              return planes, mesh_path
+            # Generate video
+            if export_video:
+                render_cameras = self._default_render_cameras(batch_size=N).to(image.device)
+                frames = []
+                chunk_size = 1  # Adjust chunk size as needed
+                for i in range(0, render_cameras.shape[1], chunk_size):
+                    frame_chunk = self.synthesizer(
+                        planes,
+                        render_cameras[:, i:i + chunk_size],
+                        render_size,
+                        render_size,
+                        0,
+                        0
+                    )
+                    frames.append(frame_chunk['images_rgb'])
+                frames = torch.cat(frames, dim=1)
+                frames = (frames.permute(0, 2, 3, 1).cpu().numpy() * 255).astype(np.uint8)
+                # Save video
+                video_path = "awesome_video.mp4"
+                imageio.mimwrite(video_path, frames, fps=fps)
+                return planes, video_path
+            return planes
     # Copied from https://github.com/facebookresearch/vfusion3d/blob/main/lrm/cam_utils.py
     # and https://github.com/facebookresearch/vfusion3d/blob/main/lrm/inferrer.py