ZeqiangLai commited on
Commit
8839214
·
verified ·
1 Parent(s): 79aad46

Update hy3dgen/shapegen/pipelines.py

Browse files
Files changed (1) hide show
  1. hy3dgen/shapegen/pipelines.py +169 -34
hy3dgen/shapegen/pipelines.py CHANGED
@@ -34,11 +34,12 @@ import trimesh
34
  import yaml
35
  from PIL import Image
36
  from diffusers.utils.torch_utils import randn_tensor
 
37
  from tqdm import tqdm
38
 
39
  from .models.autoencoders import ShapeVAE
40
  from .models.autoencoders import SurfaceExtractors
41
- from .utils import logger, synchronize_timer
42
 
43
 
44
  def retrieve_timesteps(
@@ -137,6 +138,9 @@ def instantiate_from_config(config, **kwargs):
137
 
138
 
139
  class Hunyuan3DDiTPipeline:
 
 
 
140
  @classmethod
141
  @synchronize_timer('Hunyuan3DDiTPipeline Model Loading')
142
  def from_single_file(
@@ -217,34 +221,12 @@ class Hunyuan3DDiTPipeline:
217
  dtype=dtype,
218
  device=device,
219
  )
220
- original_model_path = model_path
221
- # try local path
222
- base_dir = os.environ.get('HY3DGEN_MODELS', '~/.cache/hy3dgen')
223
- model_path = os.path.expanduser(os.path.join(base_dir, model_path, subfolder))
224
- logger.info(f'Try to load model from local path: {model_path}')
225
- if not os.path.exists(model_path):
226
- logger.info('Model path not exists, try to download from huggingface')
227
- try:
228
- import huggingface_hub
229
- # download from huggingface
230
- path = huggingface_hub.snapshot_download(repo_id=original_model_path)
231
- model_path = os.path.join(path, subfolder)
232
- except ImportError:
233
- logger.warning(
234
- "You need to install HuggingFace Hub to load models from the hub."
235
- )
236
- raise RuntimeError(f"Model path {model_path} not found")
237
- except Exception as e:
238
- raise e
239
-
240
- if not os.path.exists(model_path):
241
- raise FileNotFoundError(f"Model path {original_model_path} not found")
242
-
243
- extension = 'ckpt' if not use_safetensors else 'safetensors'
244
- variant = '' if variant is None else f'.{variant}'
245
- ckpt_name = f'model{variant}.{extension}'
246
- config_path = os.path.join(model_path, 'config.yaml')
247
- ckpt_path = os.path.join(model_path, ckpt_name)
248
  return cls.from_single_file(
249
  ckpt_path,
250
  config_path,
@@ -278,17 +260,170 @@ class Hunyuan3DDiTPipeline:
278
  self.model = torch.compile(self.model)
279
  self.conditioner = torch.compile(self.conditioner)
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  def to(self, device=None, dtype=None):
282
- if device is not None:
283
- self.device = torch.device(device)
284
- self.vae.to(device)
285
- self.model.to(device)
286
- self.conditioner.to(device)
287
  if dtype is not None:
288
  self.dtype = dtype
289
  self.vae.to(dtype=dtype)
290
  self.model.to(dtype=dtype)
291
  self.conditioner.to(dtype=dtype)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
 
293
  @synchronize_timer('Encode cond')
294
  def encode_cond(self, image, additional_cond_inputs, do_classifier_free_guidance, dual_guidance):
 
34
  import yaml
35
  from PIL import Image
36
  from diffusers.utils.torch_utils import randn_tensor
37
+ from diffusers.utils.import_utils import is_accelerate_version, is_accelerate_available
38
  from tqdm import tqdm
39
 
40
  from .models.autoencoders import ShapeVAE
41
  from .models.autoencoders import SurfaceExtractors
42
+ from .utils import logger, synchronize_timer, smart_load_model
43
 
44
 
45
  def retrieve_timesteps(
 
138
 
139
 
140
  class Hunyuan3DDiTPipeline:
141
+ model_cpu_offload_seq = "conditioner->model->vae"
142
+ _exclude_from_cpu_offload = []
143
+
144
  @classmethod
145
  @synchronize_timer('Hunyuan3DDiTPipeline Model Loading')
146
  def from_single_file(
 
221
  dtype=dtype,
222
  device=device,
223
  )
224
+ config_path, ckpt_path = smart_load_model(
225
+ model_path,
226
+ subfolder=subfolder,
227
+ use_safetensors=use_safetensors,
228
+ variant=variant
229
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  return cls.from_single_file(
231
  ckpt_path,
232
  config_path,
 
260
  self.model = torch.compile(self.model)
261
  self.conditioner = torch.compile(self.conditioner)
262
 
263
+ def enable_flashvdm(
264
+ self,
265
+ enabled: bool = True,
266
+ adaptive_kv_selection=True,
267
+ topk_mode='mean',
268
+ mc_algo='dmc',
269
+ replace_vae=True,
270
+ ):
271
+ if enabled:
272
+ model_path = self.kwargs['from_pretrained_kwargs']['model_path']
273
+ turbo_vae_mapping = {
274
+ 'Hunyuan3D-2': ('tencent/Hunyuan3D-2', 'hunyuan3d-vae-v2-0-turbo'),
275
+ 'Hunyuan3D-2mv': ('tencent/Hunyuan3D-2', 'hunyuan3d-vae-v2-0-turbo'),
276
+ 'Hunyuan3D-2mini': ('tencent/Hunyuan3D-2mini', 'hunyuan3d-vae-v2-mini-turbo'),
277
+ }
278
+ model_name = model_path.split('/')[-1]
279
+ if replace_vae and model_name in turbo_vae_mapping:
280
+ model_path, subfolder = turbo_vae_mapping[model_name]
281
+ self.vae = ShapeVAE.from_pretrained(
282
+ model_path, subfolder=subfolder,
283
+ use_safetensors=self.kwargs['from_pretrained_kwargs']['use_safetensors'],
284
+ device=self.device,
285
+ )
286
+ self.vae.enable_flashvdm_decoder(
287
+ enabled=enabled,
288
+ adaptive_kv_selection=adaptive_kv_selection,
289
+ topk_mode=topk_mode,
290
+ mc_algo=mc_algo
291
+ )
292
+ else:
293
+ model_path = self.kwargs['from_pretrained_kwargs']['model_path']
294
+ vae_mapping = {
295
+ 'Hunyuan3D-2': ('tencent/Hunyuan3D-2', 'hunyuan3d-vae-v2-0'),
296
+ 'Hunyuan3D-2mv': ('tencent/Hunyuan3D-2', 'hunyuan3d-vae-v2-0'),
297
+ 'Hunyuan3D-2mini': ('tencent/Hunyuan3D-2mini', 'hunyuan3d-vae-v2-mini'),
298
+ }
299
+ model_name = model_path.split('/')[-1]
300
+ if model_name in vae_mapping:
301
+ model_path, subfolder = vae_mapping[model_name]
302
+ self.vae = ShapeVAE.from_pretrained(model_path, subfolder=subfolder)
303
+ self.vae.enable_flashvdm_decoder(enabled=False)
304
+
305
  def to(self, device=None, dtype=None):
 
 
 
 
 
306
  if dtype is not None:
307
  self.dtype = dtype
308
  self.vae.to(dtype=dtype)
309
  self.model.to(dtype=dtype)
310
  self.conditioner.to(dtype=dtype)
311
+ if device is not None:
312
+ self.device = torch.device(device)
313
+ self.vae.to(device)
314
+ self.model.to(device)
315
+ self.conditioner.to(device)
316
+
317
+ @property
318
+ def _execution_device(self):
319
+ r"""
320
+ Returns the device on which the pipeline's models will be executed. After calling
321
+ [`~DiffusionPipeline.enable_sequential_cpu_offload`] the execution device can only be inferred from
322
+ Accelerate's module hooks.
323
+ """
324
+ for name, model in self.components.items():
325
+ if not isinstance(model, torch.nn.Module) or name in self._exclude_from_cpu_offload:
326
+ continue
327
+
328
+ if not hasattr(model, "_hf_hook"):
329
+ return self.device
330
+ for module in model.modules():
331
+ if (
332
+ hasattr(module, "_hf_hook")
333
+ and hasattr(module._hf_hook, "execution_device")
334
+ and module._hf_hook.execution_device is not None
335
+ ):
336
+ return torch.device(module._hf_hook.execution_device)
337
+ return self.device
338
+
339
+ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
340
+ r"""
341
+ Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
342
+ to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
343
+ method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
344
+ `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
345
+
346
+ Arguments:
347
+ gpu_id (`int`, *optional*):
348
+ The ID of the accelerator that shall be used in inference. If not specified, it will default to 0.
349
+ device (`torch.Device` or `str`, *optional*, defaults to "cuda"):
350
+ The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
351
+ default to "cuda".
352
+ """
353
+ if self.model_cpu_offload_seq is None:
354
+ raise ValueError(
355
+ "Model CPU offload cannot be enabled because no `model_cpu_offload_seq` class attribute is set."
356
+ )
357
+
358
+ if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
359
+ from accelerate import cpu_offload_with_hook
360
+ else:
361
+ raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
362
+
363
+ torch_device = torch.device(device)
364
+ device_index = torch_device.index
365
+
366
+ if gpu_id is not None and device_index is not None:
367
+ raise ValueError(
368
+ f"You have passed both `gpu_id`={gpu_id} and an index as part of the passed device `device`={device}"
369
+ f"Cannot pass both. Please make sure to either not define `gpu_id` or not pass the index as part of the device: `device`={torch_device.type}"
370
+ )
371
+
372
+ # _offload_gpu_id should be set to passed gpu_id (or id in passed `device`) or default to previously set id or default to 0
373
+ self._offload_gpu_id = gpu_id or torch_device.index or getattr(self, "_offload_gpu_id", 0)
374
+
375
+ device_type = torch_device.type
376
+ device = torch.device(f"{device_type}:{self._offload_gpu_id}")
377
+
378
+ if self.device.type != "cpu":
379
+ self.to("cpu")
380
+ device_mod = getattr(torch, self.device.type, None)
381
+ if hasattr(device_mod, "empty_cache") and device_mod.is_available():
382
+ device_mod.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
383
+
384
+ all_model_components = {k: v for k, v in self.components.items() if isinstance(v, torch.nn.Module)}
385
+
386
+ self._all_hooks = []
387
+ hook = None
388
+ for model_str in self.model_cpu_offload_seq.split("->"):
389
+ model = all_model_components.pop(model_str, None)
390
+ if not isinstance(model, torch.nn.Module):
391
+ continue
392
+
393
+ _, hook = cpu_offload_with_hook(model, device, prev_module_hook=hook)
394
+ self._all_hooks.append(hook)
395
+
396
+ # CPU offload models that are not in the seq chain unless they are explicitly excluded
397
+ # these models will stay on CPU until maybe_free_model_hooks is called
398
+ # some models cannot be in the seq chain because they are iteratively called, such as controlnet
399
+ for name, model in all_model_components.items():
400
+ if not isinstance(model, torch.nn.Module):
401
+ continue
402
+
403
+ if name in self._exclude_from_cpu_offload:
404
+ model.to(device)
405
+ else:
406
+ _, hook = cpu_offload_with_hook(model, device)
407
+ self._all_hooks.append(hook)
408
+
409
+ def maybe_free_model_hooks(self):
410
+ r"""
411
+ Function that offloads all components, removes all model hooks that were added when using
412
+ `enable_model_cpu_offload` and then applies them again. In case the model has not been offloaded this function
413
+ is a no-op. Make sure to add this function to the end of the `__call__` function of your pipeline so that it
414
+ functions correctly when applying enable_model_cpu_offload.
415
+ """
416
+ if not hasattr(self, "_all_hooks") or len(self._all_hooks) == 0:
417
+ # `enable_model_cpu_offload` has not be called, so silently do nothing
418
+ return
419
+
420
+ for hook in self._all_hooks:
421
+ # offload model and remove hook from model
422
+ hook.offload()
423
+ hook.remove()
424
+
425
+ # make sure the model is in the same state as before calling it
426
+ self.enable_model_cpu_offload()
427
 
428
  @synchronize_timer('Encode cond')
429
  def encode_cond(self, image, additional_cond_inputs, do_classifier_free_guidance, dual_guidance):