Upload folder using huggingface_hub

82ea528 verified about 1 month ago

7.96 kB

	import os
	import torch
	import math
	import comfy.model_management as mm
	from comfy.utils import ProgressBar
	import folder_paths

	from .depthcrafter.unet import DiffusersUNetSpatioTemporalConditionModelDepthCrafter
	from .depthcrafter.depth_crafter_ppl import DepthCrafterPipeline

	class DepthCrafterNode:
	def __init__(self):
	self.progress_bar = None

	def start_progress(self, total_steps, desc="Processing"):
	self.progress_bar = ProgressBar(total_steps)

	def update_progress(self, args, *kwargs):
	if self.progress_bar:
	self.progress_bar.update(1)

	def end_progress(self):
	self.progress_bar = None

	CATEGORY = "DepthCrafter"


	class DownloadAndLoadDepthCrafterModel(DepthCrafterNode):
	@classmethod
	def INPUT_TYPES(s):
	return {"required": {
	"enable_model_cpu_offload": ("BOOLEAN", {"default": True}),
	"enable_sequential_cpu_offload": ("BOOLEAN", {"default": False}),
	}}

	RETURN_TYPES = ("DEPTHCRAFTER_MODEL",)
	RETURN_NAMES = ("depthcrafter_model",)
	FUNCTION = "load_model"
	DESCRIPTION = """
	Downloads and loads the DepthCrafter model.
	- enable_model_cpu_offload: If True, the model will be offloaded to the CPU. (Saves VRAM)
	- enable_sequential_cpu_offload: If True, the model will be offloaded to the CPU in a sequential manner. (Saves the most VRAM but runs slowly)
	Only enable one of the two at a time.
	"""

	def load_model(self, enable_model_cpu_offload, enable_sequential_cpu_offload):
	device = mm.get_torch_device()

	model_dir = os.path.join(folder_paths.models_dir, "depthcrafter")
	os.makedirs(model_dir, exist_ok=True)

	# Paths to models
	unet_path = os.path.join(model_dir, "tencent_DepthCrafter")
	pretrain_path = os.path.join(model_dir, "stabilityai_stable-video-diffusion-img2vid-xt")

	depthcrafter_files_to_download = [
	"config.json",
	"diffusion_pytorch_model.safetensors",
	]
	svd_files_to_download = [
	"feature_extractor/preprocessor_config.json",
	"image_encoder/config.json",
	"image_encoder/model.fp16.safetensors",
	"scheduler/scheduler_config.json",
	"unet/config.json",
	"unet/diffusion_pytorch_model.fp16.safetensors",
	"vae/config.json",
	"vae/diffusion_pytorch_model.fp16.safetensors",
	"model_index.json",
	]

	self.start_progress(len(svd_files_to_download) + len(depthcrafter_files_to_download))

	# Check if models exist, if not download them
	from huggingface_hub import hf_hub_download

	if not os.path.exists(unet_path):
	print(f"Downloading UNet model to: {unet_path}")
	for path in depthcrafter_files_to_download:
	hf_hub_download(
	repo_id="tencent/DepthCrafter",
	filename=path,
	local_dir=unet_path,
	local_dir_use_symlinks=False,
	revision="c1a22b53f8abf80cd0b025adf29e637773229eca",
	)
	self.update_progress()

	if not os.path.exists(pretrain_path):
	print(f"Downloading pre-trained pipeline to: {pretrain_path}")
	for path in svd_files_to_download:
	hf_hub_download(
	repo_id="stabilityai/stable-video-diffusion-img2vid-xt",
	filename=path,
	local_dir=pretrain_path,
	local_dir_use_symlinks=False,
	revision="9e43909513c6714f1bc78bcb44d96e733cd242aa",
	)
	self.update_progress()

	# Load the custom UNet model
	unet = DiffusersUNetSpatioTemporalConditionModelDepthCrafter.from_pretrained(
	unet_path,
	torch_dtype=torch.float16,
	low_cpu_mem_usage=True,
	)

	# Load the pipeline
	pipe = DepthCrafterPipeline.from_pretrained(
	pretrain_path,
	unet=unet,
	torch_dtype=torch.float16,
	variant="fp16",
	use_local_files_only=True,
	low_cpu_mem_usage=True,
	)

	# Model setup
	try:
	pipe.enable_xformers_memory_efficient_attention()
	except Exception as e:
	print(e)
	print("Xformers is not enabled")
	pipe.enable_attention_slicing()

	if enable_model_cpu_offload:
	pipe.enable_model_cpu_offload()
	elif enable_sequential_cpu_offload:
	pipe.enable_sequential_cpu_offload()
	else:
	pipe.to(device)


	depthcrafter_model = {
	"pipe": pipe,
	"device": device,
	}

	self.end_progress()

	return (depthcrafter_model,)

	class DepthCrafter(DepthCrafterNode):
	@classmethod
	def INPUT_TYPES(s):
	return {"required": {
	"depthcrafter_model": ("DEPTHCRAFTER_MODEL", ),
	"images": ("IMAGE", ),
	"max_res": ("INT", {"default": 1024, "min": 0, "max": 4096, "step": 64}),
	"num_inference_steps": ("INT", {"default": 5, "min": 1, "max": 100}),
	"guidance_scale": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1}),
	"window_size": ("INT", {"default": 110, "min": 1, "max": 200}),
	"overlap": ("INT", {"default": 25, "min": 0, "max": 100}),
	}}

	RETURN_TYPES = ("IMAGE",)
	RETURN_NAMES = ("depth_maps",)
	FUNCTION = "process"
	DESCRIPTION = """
	Runs the DepthCrafter model on the input images.
	"""

	def process(self, depthcrafter_model, images, max_res, num_inference_steps, guidance_scale, window_size, overlap):
	device = depthcrafter_model['device']
	pipe = depthcrafter_model['pipe']

	B, H, W, C = images.shape

	# Round to nearest multiple of 64
	width = round(W / 64) * 64
	height = round(H / 64) * 64

	# Scale images if necessary
	max_dim = max(height, width)
	if max_dim > max_res:
	scale_factor = max_res / max_dim
	height = round(H * scale_factor / 64) * 64
	width = round(W * scale_factor / 64) * 64
	images = torch.nn.functional.interpolate(images.permute(0, 3, 1, 2), size=(height, width), mode='bilinear', align_corners=False).permute(0, 2, 3, 1)

	# Permute images to [t, c, h, w]
	images = images.permute(0, 3, 1, 2) # [B, C, H, W]
	images = images.to(device=device, dtype=torch.float16)
	images = torch.clamp(images, 0, 1)

	# Calculate total num of steps
	num_windows = math.ceil((B - window_size) / (window_size - overlap)) + 1
	self.start_progress(num_inference_steps * num_windows)

	# Run the pipeline
	with torch.inference_mode():
	result = pipe(
	images,
	height=height,
	width=width,
	output_type="pt",
	guidance_scale=guidance_scale,
	num_inference_steps=num_inference_steps,
	window_size=window_size,
	overlap=overlap,
	track_time=False,
	progress_callback=self.update_progress,
	)

	res = result.frames[0] # [B, H, W, C]

	# Convert to grayscale depth map
	res = res.sum(dim=1) / res.shape[1] # [t, h, w]

	# Normalize depth maps
	res_min = res.min()
	res_max = res.max()
	res = (res - res_min) / (res_max - res_min + 1e-8)

	# Convert back to tensor with 3 channels
	depth_maps = res.unsqueeze(-1).repeat(1, 1, 1, 3) # [t, h, w, 3]
	depth_maps = depth_maps.float()

	self.end_progress()

	return (depth_maps,)