|
from collections.abc import Iterable |
|
from typing import Union, Callable |
|
import torch |
|
from torch import Tensor |
|
import torch.fft as fft |
|
from einops import rearrange |
|
|
|
import comfy.sample |
|
import comfy.samplers |
|
import comfy.model_management |
|
from comfy.model_patcher import ModelPatcher |
|
from comfy.model_base import BaseModel |
|
from comfy.sd import VAE |
|
|
|
from . import freeinit |
|
from .conditioning import LoraHookMode |
|
from .context import ContextOptions, ContextOptionsGroup |
|
from .utils_model import SigmaSchedule |
|
from .utils_motion import extend_to_batch_size, get_sorted_list_via_attr, prepare_mask_batch |
|
from .logger import logger |
|
|
|
|
|
def prepare_mask_ad(noise_mask, shape, device): |
|
"""ensures noise mask is of proper dimensions""" |
|
noise_mask = torch.nn.functional.interpolate(noise_mask.reshape((-1, 1, noise_mask.shape[-2], noise_mask.shape[-1])), size=(shape[2], shape[3]), mode="bilinear") |
|
|
|
noise_mask = torch.cat([noise_mask] * shape[1], dim=1) |
|
noise_mask = comfy.utils.repeat_to_batch_size(noise_mask, shape[0]) |
|
noise_mask = noise_mask.to(device) |
|
return noise_mask |
|
|
|
|
|
class NoiseLayerType: |
|
DEFAULT = "default" |
|
CONSTANT = "constant" |
|
EMPTY = "empty" |
|
REPEATED_CONTEXT = "repeated_context" |
|
FREENOISE = "FreeNoise" |
|
|
|
LIST = [DEFAULT, CONSTANT, EMPTY, REPEATED_CONTEXT, FREENOISE] |
|
|
|
|
|
class NoiseApplication: |
|
ADD = "add" |
|
ADD_WEIGHTED = "add_weighted" |
|
REPLACE = "replace" |
|
|
|
LIST = [ADD, ADD_WEIGHTED, REPLACE] |
|
|
|
|
|
class NoiseNormalize: |
|
DISABLE = "disable" |
|
NORMAL = "normal" |
|
|
|
LIST = [DISABLE, NORMAL] |
|
|
|
|
|
class SampleSettings: |
|
def __init__(self, batch_offset: int=0, noise_type: str=None, seed_gen: str=None, seed_offset: int=0, noise_layers: 'NoiseLayerGroup'=None, |
|
iteration_opts=None, seed_override:int=None, negative_cond_flipflop=False, adapt_denoise_steps: bool=False, |
|
custom_cfg: 'CustomCFGKeyframeGroup'=None, sigma_schedule: SigmaSchedule=None, image_injection: 'NoisedImageToInjectGroup'=None, |
|
noise_calibration: 'NoiseCalibration'=None): |
|
self.batch_offset = batch_offset |
|
self.noise_type = noise_type if noise_type is not None else NoiseLayerType.DEFAULT |
|
self.seed_gen = seed_gen if seed_gen is not None else SeedNoiseGeneration.COMFY |
|
self.noise_layers = noise_layers if noise_layers else NoiseLayerGroup() |
|
self.iteration_opts = iteration_opts if iteration_opts else IterationOptions() |
|
self.seed_offset = seed_offset |
|
self.seed_override = seed_override |
|
self.negative_cond_flipflop = negative_cond_flipflop |
|
self.adapt_denoise_steps = adapt_denoise_steps |
|
self.custom_cfg = custom_cfg.clone() if custom_cfg else custom_cfg |
|
self.sigma_schedule = sigma_schedule |
|
self.image_injection = image_injection.clone() if image_injection else NoisedImageToInjectGroup() |
|
self.noise_calibration = noise_calibration |
|
|
|
def prepare_noise(self, seed: int, latents: Tensor, noise: Tensor, extra_seed_offset=0, extra_args:dict={}, force_create_noise=True): |
|
if self.seed_override is not None: |
|
seed = self.seed_override |
|
|
|
if isinstance(seed, Iterable): |
|
noise = SeedNoiseGeneration.create_noise_individual_seeds(seeds=seed, latents=latents, seed_offset=self.seed_offset+extra_seed_offset, extra_args=extra_args) |
|
seed = seed[0]+self.seed_offset |
|
else: |
|
seed += self.seed_offset |
|
|
|
if self.batch_offset != 0 or self.seed_offset != 0 or self.noise_type != NoiseLayerType.DEFAULT or self.seed_gen != SeedNoiseGeneration.COMFY or force_create_noise: |
|
noise = SeedNoiseGeneration.create_noise(seed=seed+extra_seed_offset, latents=latents, existing_seed_gen=self.seed_gen, seed_gen=self.seed_gen, |
|
noise_type=self.noise_type, batch_offset=self.batch_offset, extra_args=extra_args) |
|
|
|
for noise_layer in self.noise_layers.layers: |
|
|
|
layer_noise = noise_layer.create_layer_noise(existing_seed_gen=self.seed_gen, seed=seed, latents=latents, |
|
extra_seed_offset=extra_seed_offset, extra_args=extra_args) |
|
|
|
noise = noise_layer.apply_layer_noise(new_noise=layer_noise, old_noise=noise) |
|
|
|
return noise |
|
|
|
def pre_run(self, model: ModelPatcher): |
|
if self.custom_cfg is not None: |
|
self.custom_cfg.reset() |
|
if self.image_injection is not None: |
|
self.image_injection.reset() |
|
|
|
def cleanup(self): |
|
if self.custom_cfg is not None: |
|
self.custom_cfg.reset() |
|
if self.image_injection is not None: |
|
self.image_injection.reset() |
|
|
|
def clone(self): |
|
return SampleSettings(batch_offset=self.batch_offset, noise_type=self.noise_type, seed_gen=self.seed_gen, seed_offset=self.seed_offset, |
|
noise_layers=self.noise_layers.clone(), iteration_opts=self.iteration_opts, seed_override=self.seed_override, |
|
negative_cond_flipflop=self.negative_cond_flipflop, adapt_denoise_steps=self.adapt_denoise_steps, custom_cfg=self.custom_cfg, |
|
sigma_schedule=self.sigma_schedule, image_injection=self.image_injection, noise_calibration=self.noise_calibration) |
|
|
|
|
|
class NoiseLayer: |
|
def __init__(self, noise_type: str, batch_offset: int, seed_gen_override: str, seed_offset: int, seed_override: int=None, mask: Tensor=None): |
|
self.application: str = NoiseApplication.REPLACE |
|
self.noise_type = noise_type |
|
self.batch_offset = batch_offset |
|
self.seed_gen_override = seed_gen_override |
|
self.seed_offset = seed_offset |
|
self.seed_override = seed_override |
|
self.mask = mask |
|
|
|
def create_layer_noise(self, existing_seed_gen: str, seed: int, latents: Tensor, extra_seed_offset=0, extra_args:dict={}) -> Tensor: |
|
if self.seed_override is not None: |
|
seed = self.seed_override |
|
|
|
if isinstance(seed, Iterable): |
|
return SeedNoiseGeneration.create_noise_individual_seeds(seeds=seed, latents=latents, seed_offset=self.seed_offset+extra_seed_offset, extra_args=extra_args) |
|
seed += self.seed_offset + extra_seed_offset |
|
return SeedNoiseGeneration.create_noise(seed=seed, latents=latents, existing_seed_gen=existing_seed_gen, seed_gen=self.seed_gen_override, |
|
noise_type=self.noise_type, batch_offset=self.batch_offset, extra_args=extra_args) |
|
|
|
def apply_layer_noise(self, new_noise: Tensor, old_noise: Tensor) -> Tensor: |
|
return old_noise |
|
|
|
def get_noise_mask(self, noise: Tensor) -> Tensor: |
|
if self.mask is None: |
|
return 1 |
|
noise_mask = self.mask.reshape((-1, 1, self.mask.shape[-2], self.mask.shape[-1])) |
|
return prepare_mask_ad(noise_mask, noise.shape, noise.device) |
|
|
|
|
|
class NoiseLayerReplace(NoiseLayer): |
|
def __init__(self, noise_type: str, batch_offset: int, seed_gen_override: str, seed_offset: int, seed_override: int=None, mask: Tensor=None): |
|
super().__init__(noise_type, batch_offset, seed_gen_override, seed_offset, seed_override, mask) |
|
self.application = NoiseApplication.REPLACE |
|
|
|
def apply_layer_noise(self, new_noise: Tensor, old_noise: Tensor) -> Tensor: |
|
noise_mask = self.get_noise_mask(old_noise) |
|
return (1-noise_mask)*old_noise + noise_mask*new_noise |
|
|
|
|
|
class NoiseLayerAdd(NoiseLayer): |
|
def __init__(self, noise_type: str, batch_offset: int, seed_gen_override: str, seed_offset: int, seed_override: int=None, mask: Tensor=None, |
|
noise_weight=1.0): |
|
super().__init__(noise_type, batch_offset, seed_gen_override, seed_offset, seed_override, mask) |
|
self.noise_weight = noise_weight |
|
self.application = NoiseApplication.ADD |
|
|
|
def apply_layer_noise(self, new_noise: Tensor, old_noise: Tensor) -> Tensor: |
|
noise_mask = self.get_noise_mask(old_noise) |
|
return (1-noise_mask)*old_noise + noise_mask*(old_noise + new_noise * self.noise_weight) |
|
|
|
|
|
class NoiseLayerAddWeighted(NoiseLayerAdd): |
|
def __init__(self, noise_type: str, batch_offset: int, seed_gen_override: str, seed_offset: int, seed_override: int=None, mask: Tensor=None, |
|
noise_weight=1.0, balance_multiplier=1.0): |
|
super().__init__(noise_type, batch_offset, seed_gen_override, seed_offset, seed_override, mask, noise_weight) |
|
self.balance_multiplier = balance_multiplier |
|
self.application = NoiseApplication.ADD_WEIGHTED |
|
|
|
def apply_layer_noise(self, new_noise: Tensor, old_noise: Tensor) -> Tensor: |
|
noise_mask = self.get_noise_mask(old_noise) |
|
return (1-noise_mask)*old_noise + noise_mask*(old_noise * (1.0-(self.noise_weight*self.balance_multiplier)) + new_noise * self.noise_weight) |
|
|
|
|
|
class NoiseLayerGroup: |
|
def __init__(self): |
|
self.layers: list[NoiseLayer] = [] |
|
|
|
def add(self, layer: NoiseLayer) -> None: |
|
|
|
self.layers.append(layer) |
|
|
|
def add_to_start(self, layer: NoiseLayer) -> None: |
|
|
|
self.layers.insert(0, layer) |
|
|
|
def __getitem__(self, index) -> NoiseLayer: |
|
return self.layers[index] |
|
|
|
def is_empty(self) -> bool: |
|
return len(self.layers) == 0 |
|
|
|
def clone(self) -> 'NoiseLayerGroup': |
|
cloned = NoiseLayerGroup() |
|
for layer in self.layers: |
|
cloned.add(layer) |
|
return cloned |
|
|
|
|
|
class RandDevice: |
|
CPU = "cpu" |
|
GPU = "gpu" |
|
NV = "nv" |
|
|
|
|
|
def get_generator(device=RandDevice.CPU, seed: int=None): |
|
generator = None |
|
raw_device = None |
|
if device == RandDevice.CPU: |
|
raw_device = "cpu" |
|
generator = torch.Generator(raw_device) |
|
elif device == RandDevice.GPU: |
|
raw_device = comfy.model_management.get_torch_device() |
|
generator = torch.Generator(raw_device) |
|
|
|
|
|
|
|
|
|
else: |
|
raise Exception(f"Unknown noise generator device: '{device}'") |
|
if seed is not None: |
|
generator = generator.manual_seed(seed) |
|
return generator, raw_device |
|
|
|
|
|
class SeedNoiseGeneration: |
|
COMFY = "comfy" |
|
COMFYGPU = "comfy [gpu]" |
|
|
|
AUTO1111 = "auto1111" |
|
AUTO1111GPU = "auto1111 [gpu]" |
|
|
|
USE_EXISTING = "use existing" |
|
|
|
LIST = [COMFY, COMFYGPU, AUTO1111, AUTO1111GPU] |
|
LIST_WITH_OVERRIDE = [USE_EXISTING, COMFY, COMFYGPU, AUTO1111, AUTO1111GPU] |
|
|
|
_COMFY_GENS = [COMFY, COMFYGPU] |
|
_AUTO1111_GENS = [AUTO1111, AUTO1111GPU] |
|
|
|
_SOURCE_DICT = { |
|
COMFY: RandDevice.CPU, COMFYGPU: RandDevice.GPU, |
|
AUTO1111: RandDevice.CPU, AUTO1111GPU: RandDevice.GPU, |
|
} |
|
|
|
@classmethod |
|
def get_device(cls, seed_gen: str): |
|
return cls._SOURCE_DICT[seed_gen] |
|
|
|
@classmethod |
|
def create_noise(cls, seed: int, latents: Tensor, existing_seed_gen: str=COMFY, seed_gen: str=USE_EXISTING, noise_type: str=NoiseLayerType.DEFAULT, batch_offset: int=0, extra_args: dict={}): |
|
|
|
if seed_gen == cls.USE_EXISTING: |
|
seed_gen = existing_seed_gen |
|
if seed_gen in cls._COMFY_GENS: |
|
return cls.create_noise_comfy(seed, latents, noise_type, batch_offset, extra_args, cls.get_device(seed_gen)) |
|
elif seed_gen in cls._AUTO1111_GENS: |
|
return cls.create_noise_auto1111(seed, latents, noise_type, batch_offset, extra_args, cls.get_device(seed_gen)) |
|
raise ValueError(f"Noise seed_gen {seed_gen} is not recognized.") |
|
|
|
@staticmethod |
|
def create_noise_comfy(seed: int, latents: Tensor, noise_type: str=NoiseLayerType.DEFAULT, batch_offset: int=0, extra_args: dict={}, device=RandDevice.CPU): |
|
common_noise = SeedNoiseGeneration._create_common_noise(seed, latents, noise_type, batch_offset, extra_args, device) |
|
if common_noise is not None: |
|
return common_noise |
|
if noise_type == NoiseLayerType.CONSTANT: |
|
generator, raw_device = get_generator(device, seed) |
|
length = latents.shape[0] |
|
single_shape = (1 + batch_offset, latents.shape[1], latents.shape[2], latents.shape[3]) |
|
single_noise = torch.randn(single_shape, dtype=latents.dtype, layout=latents.layout, generator=generator, device=raw_device).to(device="cpu") |
|
return torch.cat([single_noise[batch_offset:]] * length, dim=0) |
|
|
|
generator, raw_device = get_generator(device, seed) |
|
offset_shape = (latents.shape[0] + batch_offset, latents.shape[1], latents.shape[2], latents.shape[3]) |
|
final_noise = torch.randn(offset_shape, dtype=latents.dtype, layout=latents.layout, generator=generator, device=raw_device).to(device="cpu") |
|
final_noise = final_noise[batch_offset:] |
|
|
|
derivative_noise = SeedNoiseGeneration._create_derivative_noise(final_noise, noise_type=noise_type, seed=seed, extra_args=extra_args, device=device) |
|
if derivative_noise is not None: |
|
return derivative_noise |
|
return final_noise |
|
|
|
@staticmethod |
|
def create_noise_auto1111(seed: int, latents: Tensor, noise_type: str=NoiseLayerType.DEFAULT, batch_offset: int=0, extra_args: dict={}, device=RandDevice.CPU): |
|
common_noise = SeedNoiseGeneration._create_common_noise(seed, latents, noise_type, batch_offset, extra_args, device) |
|
if common_noise is not None: |
|
return common_noise |
|
if noise_type == NoiseLayerType.CONSTANT: |
|
generator, raw_device = get_generator(device, seed+batch_offset) |
|
length = latents.shape[0] |
|
single_shape = (1, latents.shape[1], latents.shape[2], latents.shape[3]) |
|
single_noise = torch.randn(single_shape, dtype=latents.dtype, layout=latents.layout, generator=generator, device=raw_device).to(device="cpu") |
|
return torch.cat([single_noise] * length, dim=0) |
|
|
|
length = latents.shape[0] |
|
single_shape = (1, latents.shape[1], latents.shape[2], latents.shape[3]) |
|
all_noises = [] |
|
|
|
for i in range(length): |
|
generator, raw_device = get_generator(device, seed+i+batch_offset) |
|
all_noises.append(torch.randn(single_shape, dtype=latents.dtype, layout=latents.layout, generator=generator, device=raw_device).to(device="cpu")) |
|
final_noise = torch.cat(all_noises, dim=0) |
|
|
|
derivative_noise = SeedNoiseGeneration._create_derivative_noise(final_noise, noise_type=noise_type, seed=seed, extra_args=extra_args, device=device) |
|
if derivative_noise is not None: |
|
return derivative_noise |
|
return final_noise |
|
|
|
@staticmethod |
|
def create_noise_individual_seeds(seeds: list[int], latents: Tensor, seed_offset: int=0, extra_args: dict={}, device=RandDevice.CPU): |
|
length = latents.shape[0] |
|
if len(seeds) < length: |
|
raise ValueError(f"{len(seeds)} seeds in seed_override were provided, but at least {length} are required to work with the current latents.") |
|
seeds = seeds[:length] |
|
single_shape = (1, latents.shape[1], latents.shape[2], latents.shape[3]) |
|
all_noises = [] |
|
for seed in seeds: |
|
generator, raw_device = get_generator(device, seed+seed_offset) |
|
all_noises.append(torch.randn(single_shape, dtype=latents.dtype, layout=latents.layout, generator=generator, device=raw_device).to(device="cpu")) |
|
return torch.cat(all_noises, dim=0) |
|
|
|
@staticmethod |
|
def _create_common_noise(seed: int, latents: Tensor, noise_type: str=NoiseLayerType.DEFAULT, batch_offset: int=0, extra_args: dict={}, device=RandDevice.CPU): |
|
if noise_type == NoiseLayerType.EMPTY: |
|
return torch.zeros_like(latents) |
|
return None |
|
|
|
@staticmethod |
|
def _create_derivative_noise(noise: Tensor, noise_type: str, seed: int, extra_args: dict, device=RandDevice.CPU): |
|
derivative_func = DERIVATIVE_NOISE_FUNC_MAP.get(noise_type, None) |
|
if derivative_func is None: |
|
return None |
|
return derivative_func(noise=noise, seed=seed, extra_args=extra_args, device=device) |
|
|
|
@staticmethod |
|
def _convert_to_repeated_context(noise: Tensor, extra_args: dict, device=RandDevice.CPU, **kwargs): |
|
|
|
opts: ContextOptionsGroup = extra_args["context_options"] |
|
context_length: int = opts.context_length if not opts.view_options else opts.view_options.context_length |
|
if context_length is None: |
|
return noise |
|
length = noise.shape[0] |
|
noise = noise[:context_length] |
|
cat_count = (length // context_length) + 1 |
|
return torch.cat([noise] * cat_count, dim=0)[:length] |
|
|
|
@staticmethod |
|
def _convert_to_freenoise(noise: Tensor, seed: int, extra_args: dict, device=RandDevice.CPU, **kwargs): |
|
|
|
opts: ContextOptionsGroup = extra_args["context_options"] |
|
context_length: int = opts.context_length if not opts.view_options else opts.view_options.context_length |
|
context_overlap: int = opts.context_overlap if not opts.view_options else opts.view_options.context_overlap |
|
video_length: int = noise.shape[0] |
|
if context_length is None: |
|
return noise |
|
delta = context_length - context_overlap |
|
generator, _ = get_generator(RandDevice.CPU, seed) |
|
|
|
for start_idx in range(0, video_length-context_length, delta): |
|
|
|
|
|
|
|
place_idx = start_idx + context_length |
|
|
|
if place_idx >= video_length: |
|
break |
|
end_idx = place_idx - 1 |
|
|
|
if end_idx + delta >= video_length: |
|
final_delta = video_length - place_idx |
|
|
|
list_idx = torch.Tensor(list(range(start_idx,start_idx+final_delta))).to(torch.long) |
|
|
|
list_idx = list_idx[torch.randperm(final_delta, generator=generator)] |
|
|
|
noise[place_idx:place_idx+final_delta] = noise[list_idx] |
|
break |
|
|
|
|
|
list_idx = torch.Tensor(list(range(start_idx,start_idx+delta))).to(torch.long) |
|
|
|
list_idx = list_idx[torch.randperm(delta, generator=generator)] |
|
|
|
noise[place_idx:place_idx+delta] = noise[list_idx] |
|
return noise |
|
|
|
|
|
DERIVATIVE_NOISE_FUNC_MAP = { |
|
NoiseLayerType.REPEATED_CONTEXT: SeedNoiseGeneration._convert_to_repeated_context, |
|
NoiseLayerType.FREENOISE: SeedNoiseGeneration._convert_to_freenoise, |
|
} |
|
|
|
|
|
class IterationOptions: |
|
SAMPLER = "sampler" |
|
|
|
def __init__(self, iterations: int=1, cache_init_noise=False, cache_init_latents=False, |
|
iter_batch_offset: int=0, iter_seed_offset: int=0): |
|
self.iterations = iterations |
|
self.cache_init_noise = cache_init_noise |
|
self.cache_init_latents = cache_init_latents |
|
self.iter_batch_offset = iter_batch_offset |
|
self.iter_seed_offset = iter_seed_offset |
|
self.need_sampler = False |
|
|
|
def get_sigma(self, model: ModelPatcher, step: int): |
|
model_sampling = model.model.model_sampling |
|
if "model_sampling" in model.object_patches: |
|
model_sampling = model.object_patches["model_sampling"] |
|
return model_sampling.sigmas[step] |
|
|
|
def initialize(self, latents: Tensor): |
|
pass |
|
|
|
def preprocess_latents(self, curr_i: int, model: ModelPatcher, latents: Tensor, noise: Tensor, |
|
seed: int, sample_settings: SampleSettings, noise_extra_args: dict, **kwargs): |
|
if curr_i == 0 or (self.iter_batch_offset == 0 and self.iter_seed_offset == 0): |
|
return latents, noise |
|
temp_sample_settings = sample_settings.clone() |
|
temp_sample_settings.batch_offset += self.iter_batch_offset * curr_i |
|
temp_sample_settings.seed_offset += self.iter_seed_offset * curr_i |
|
return latents, temp_sample_settings.prepare_noise(seed=seed, latents=latents, noise=None, |
|
extra_args=noise_extra_args, force_create_noise=True) |
|
|
|
|
|
class FreeInitOptions(IterationOptions): |
|
FREEINIT_SAMPLER = "FreeInit [sampler sigma]" |
|
FREEINIT_MODEL = "FreeInit [model sigma]" |
|
DINKINIT_V1 = "DinkInit_v1" |
|
|
|
LIST = [FREEINIT_SAMPLER, FREEINIT_MODEL, DINKINIT_V1] |
|
|
|
def __init__(self, iterations: int, step: int=999, apply_to_1st_iter: bool=False, |
|
filter=freeinit.FreeInitFilter.GAUSSIAN, d_s=0.25, d_t=0.25, n=4, init_type=FREEINIT_SAMPLER, |
|
iter_batch_offset: int=0, iter_seed_offset: int=1): |
|
super().__init__(iterations=iterations, cache_init_noise=True, cache_init_latents=True, |
|
iter_batch_offset=iter_batch_offset, iter_seed_offset=iter_seed_offset) |
|
self.apply_to_1st_iter = apply_to_1st_iter |
|
self.step = step |
|
self.filter = filter |
|
self.d_s = d_s |
|
self.d_t = d_t |
|
self.n = n |
|
self.freq_filter = None |
|
self.freq_filter2 = None |
|
self.need_sampler = True if init_type in [self.FREEINIT_SAMPLER] else False |
|
self.init_type = init_type |
|
|
|
def initialize(self, latents: Tensor): |
|
self.freq_filter = freeinit.get_freq_filter(latents.shape, device=latents.device, filter_type=self.filter, |
|
n=self.n, d_s=self.d_s, d_t=self.d_t) |
|
|
|
def preprocess_latents(self, curr_i: int, model: ModelPatcher, latents: Tensor, noise: Tensor, cached_latents: Tensor, cached_noise: Tensor, |
|
seed:int, sample_settings: SampleSettings, noise_extra_args: dict, sampler: comfy.samplers.KSampler=None, **kwargs): |
|
|
|
if curr_i == 0 and not self.apply_to_1st_iter: |
|
return latents, noise |
|
|
|
if self.init_type in [self.FREEINIT_SAMPLER, self.FREEINIT_MODEL]: |
|
|
|
|
|
|
|
|
|
if sampler is not None: |
|
sigma = sampler.sigmas[999-self.step].to(latents.device) / (model.model.latent_format.scale_factor) |
|
else: |
|
sigma = self.get_sigma(model, self.step-1000).to(latents.device) / (model.model.latent_format.scale_factor) |
|
alpha_cumprod = 1 / ((sigma * sigma) + 1) |
|
sqrt_alpha_prod = alpha_cumprod ** 0.5 |
|
sqrt_one_minus_alpha_prod = (1 - alpha_cumprod) ** 0.5 |
|
noised_latents = latents * sqrt_alpha_prod + noise.to(dtype=latents.dtype, device=latents.device) * sqrt_one_minus_alpha_prod |
|
|
|
temp_sample_settings = sample_settings.clone() |
|
temp_sample_settings.batch_offset += self.iter_batch_offset * curr_i |
|
temp_sample_settings.seed_offset += self.iter_seed_offset * curr_i |
|
z_rand = temp_sample_settings.prepare_noise(seed=seed, latents=latents, noise=None, |
|
extra_args=noise_extra_args, force_create_noise=True) |
|
|
|
noised_latents = freeinit.freq_mix_3d(x=noised_latents, noise=z_rand, LPF=self.freq_filter) |
|
return cached_latents, noised_latents |
|
elif self.init_type == self.DINKINIT_V1: |
|
|
|
|
|
|
|
sigma = self.get_sigma(model, self.step-1000).to(latents.device) |
|
alpha_cumprod = 1 / ((sigma * sigma) + 1) |
|
noised_latents = (latents + (cached_noise.to(dtype=latents.dtype, device=latents.device) * sigma)) * alpha_cumprod |
|
|
|
temp_sample_settings = sample_settings.clone() |
|
temp_sample_settings.batch_offset += self.iter_batch_offset * curr_i |
|
temp_sample_settings.seed_offset += self.iter_seed_offset * curr_i |
|
z_rand = temp_sample_settings.prepare_noise(seed=seed, latents=latents, noise=None, |
|
extra_args=noise_extra_args, force_create_noise=True) |
|
|
|
|
|
noised_latents = freeinit.freq_mix_3d(x=noised_latents, noise=z_rand, LPF=self.freq_filter) |
|
return cached_latents, noised_latents |
|
else: |
|
raise ValueError(f"FreeInit init_type '{self.init_type}' is not recognized.") |
|
|
|
|
|
class NoiseCalibration: |
|
def __init__(self, scale: float=0.5, calib_iterations: int=1): |
|
self.scale = scale |
|
self.calib_iterations = calib_iterations |
|
|
|
def perform_calibration(self, sample_func: Callable, model: ModelPatcher, latents: Tensor, noise: Tensor, is_custom: bool, args: list, kwargs: dict): |
|
if is_custom: |
|
return self._perform_calibration_custom(sample_func=sample_func, model=model, latents=latents, noise=noise, _args=args, _kwargs=kwargs) |
|
return self._perform_calibration_not_custom(sample_func=sample_func, model=model, latents=latents, noise=noise, args=args, kwargs=kwargs) |
|
|
|
def _perform_calibration_custom(self, sample_func: Callable, model: ModelPatcher, latents: Tensor, noise: Tensor, _args: list, _kwargs: dict): |
|
args = _args.copy() |
|
kwargs = _kwargs.copy() |
|
|
|
sigmas = args[2] |
|
|
|
sigmas = sigmas[:2] |
|
args[2] = sigmas |
|
|
|
sigma = sigmas[0] / (model.model.latent_format.scale_factor) |
|
alpha_cumprod = 1 / ((sigma * sigma) + 1) |
|
sqrt_alpha_prod = alpha_cumprod ** 0.5 |
|
sqrt_one_minus_alpha_prod = (1 - alpha_cumprod) ** 0.5 |
|
zero_noise = torch.zeros_like(noise) |
|
new_latents = latents |
|
|
|
for _ in range(self.calib_iterations): |
|
|
|
x = new_latents * sqrt_alpha_prod + noise * sqrt_one_minus_alpha_prod |
|
|
|
|
|
|
|
args[-1] = x |
|
e_t_theta = sample_func(model, zero_noise, *args, **kwargs) * (model.model.latent_format.scale_factor) |
|
x_0_t = (x - sqrt_one_minus_alpha_prod * e_t_theta) / sqrt_alpha_prod |
|
freq_delta = (self.get_low_or_high_fft(x_0_t, self.scale, is_low=False) - self.get_low_or_high_fft(new_latents, self.scale, is_low=False)) |
|
noise = e_t_theta + sqrt_alpha_prod / sqrt_one_minus_alpha_prod * freq_delta |
|
|
|
|
|
|
|
return latents, noise |
|
|
|
def _perform_calibration_not_custom(self, sample_func: Callable, model: ModelPatcher, latents: Tensor, noise: Tensor, args: list, kwargs: dict): |
|
return latents, noise |
|
|
|
@staticmethod |
|
|
|
def get_low_or_high_fft(x: Tensor, scale: float, is_low=True): |
|
|
|
x = rearrange(x, "b c h w -> c b h w") |
|
|
|
x_freq = fft.fftn(x, dim=(-2, -1)) |
|
x_freq = fft.fftshift(x_freq, dim=(-2, -1)) |
|
C, T, H, W = x_freq.shape |
|
|
|
|
|
if is_low: |
|
mask = torch.zeros((C, T, H, W), device=x.device) |
|
crow, ccol = H // 2, W // 2 |
|
mask[..., crow - int(crow * scale):crow + int(crow * scale), ccol - int(ccol * scale):ccol + int(ccol * scale)] = 1 |
|
else: |
|
mask = torch.ones((C, T, H, W), device=x.device) |
|
crow, ccol = H // 2, W //2 |
|
mask[..., crow - int(crow * scale):crow + int(crow * scale), ccol - int(ccol * scale):ccol + int(ccol * scale)] = 0 |
|
x_freq = x_freq * mask |
|
|
|
|
|
x_freq = fft.ifftshift(x_freq, dim=(-2, -1)) |
|
x_filtered = fft.ifftn(x_freq, dim=(-2, -1)).real |
|
|
|
x_filtered = rearrange(x_filtered, "c b h w -> b c h w") |
|
return x_filtered |
|
|
|
|
|
class CFGExtras: |
|
def __init__(self, call_fn: Callable): |
|
self.call_fn = call_fn |
|
|
|
|
|
class CFGExtrasGroup: |
|
def __init__(self): |
|
self.extras: list[CFGExtras] = [] |
|
|
|
def add(self, extra: CFGExtras): |
|
self.extras.append(extra) |
|
|
|
def is_empty(self) -> bool: |
|
return len(self.extras) == 0 |
|
|
|
def clone(self): |
|
cloned = CFGExtrasGroup() |
|
cloned.extras = self.extras.copy() |
|
return cloned |
|
|
|
|
|
class CustomCFGKeyframe: |
|
def __init__(self, cfg_multival: Union[float, Tensor], start_percent=0.0, guarantee_steps=1, cfg_extras: CFGExtrasGroup=None): |
|
self.cfg_multival = cfg_multival |
|
self.cfg_extras = cfg_extras |
|
|
|
self.start_percent = float(start_percent) |
|
self.start_t = 999999999.9 |
|
self.guarantee_steps = guarantee_steps |
|
|
|
def clone(self): |
|
c = CustomCFGKeyframe(cfg_multival=self.cfg_multival, |
|
start_percent=self.start_percent, guarantee_steps=self.guarantee_steps) |
|
c.start_t = self.start_t |
|
return c |
|
|
|
|
|
class CustomCFGKeyframeGroup: |
|
def __init__(self): |
|
self.keyframes: list[CustomCFGKeyframe] = [] |
|
self._current_keyframe: CustomCFGKeyframe = None |
|
self._current_used_steps: int = 0 |
|
self._current_index: int = 0 |
|
self._previous_t = -1 |
|
|
|
def reset(self): |
|
self._current_keyframe = None |
|
self._current_used_steps = 0 |
|
self._current_index = 0 |
|
self._set_first_as_current() |
|
|
|
def add(self, keyframe: CustomCFGKeyframe): |
|
|
|
self.keyframes.append(keyframe) |
|
self.keyframes = get_sorted_list_via_attr(self.keyframes, "start_percent") |
|
self._set_first_as_current() |
|
|
|
def _set_first_as_current(self): |
|
if len(self.keyframes) > 0: |
|
self._current_keyframe = self.keyframes[0] |
|
else: |
|
self._current_keyframe = None |
|
|
|
def has_index(self, index: int) -> int: |
|
return index >=0 and index < len(self.keyframes) |
|
|
|
def is_empty(self) -> bool: |
|
return len(self.keyframes) == 0 |
|
|
|
def clone(self): |
|
cloned = CustomCFGKeyframeGroup() |
|
for keyframe in self.keyframes: |
|
cloned.keyframes.append(keyframe) |
|
cloned._set_first_as_current() |
|
return cloned |
|
|
|
def initialize_timesteps(self, model: BaseModel): |
|
for keyframe in self.keyframes: |
|
keyframe.start_t = model.model_sampling.percent_to_sigma(keyframe.start_percent) |
|
|
|
def prepare_current_keyframe(self, t: Tensor): |
|
curr_t: float = t[0] |
|
|
|
if curr_t == self._previous_t: |
|
return |
|
prev_index = self._current_index |
|
|
|
if self._current_used_steps >= self._current_keyframe.guarantee_steps: |
|
|
|
if self.has_index(self._current_index+1): |
|
for i in range(self._current_index+1, len(self.keyframes)): |
|
eval_c = self.keyframes[i] |
|
|
|
|
|
if eval_c.start_t >= curr_t: |
|
self._current_index = i |
|
self._current_keyframe = eval_c |
|
self._current_used_steps = 0 |
|
|
|
if self._current_keyframe.guarantee_steps > 0: |
|
break |
|
|
|
else: break |
|
|
|
self._current_used_steps += 1 |
|
|
|
self._previous_t = curr_t |
|
|
|
def get_cfg_scale(self, cond: Tensor): |
|
cond_scale = self.cfg_multival |
|
if isinstance(cond_scale, Tensor): |
|
cond_scale = prepare_mask_batch(cond_scale.to(cond.dtype).to(cond.device), cond.shape) |
|
cond_scale = extend_to_batch_size(cond_scale, cond.shape[0]) |
|
return cond_scale |
|
|
|
def get_model_options(self, model_options: dict[str]): |
|
cfg_extras = self.cfg_extras |
|
if cfg_extras is not None: |
|
for extra in cfg_extras.extras: |
|
model_options = extra.call_fn(model_options) |
|
return model_options |
|
|
|
def patch_model(self, model: ModelPatcher) -> ModelPatcher: |
|
|
|
|
|
def evolved_custom_cfg(args): |
|
cond: Tensor = args["cond"] |
|
uncond: Tensor = args["uncond"] |
|
|
|
cond_scale = self.cfg_multival |
|
if isinstance(cond_scale, Tensor): |
|
cond_scale = prepare_mask_batch(cond_scale.to(cond.dtype).to(cond.device), cond.shape) |
|
cond_scale = extend_to_batch_size(cond_scale, cond.shape[0]) |
|
return uncond + (cond - uncond) * cond_scale |
|
|
|
model = model.clone() |
|
model.set_model_sampler_cfg_function(evolved_custom_cfg) |
|
return model |
|
|
|
|
|
@property |
|
def cfg_multival(self): |
|
if self._current_keyframe != None: |
|
return self._current_keyframe.cfg_multival |
|
return None |
|
|
|
@property |
|
def cfg_extras(self): |
|
if self._current_keyframe != None: |
|
return self._current_keyframe.cfg_extras |
|
return None |
|
|
|
|
|
class NoisedImageInjectOptions: |
|
def __init__(self, x=0, y=0): |
|
self.x = x |
|
self.y = y |
|
|
|
def clone(self): |
|
return NoisedImageInjectOptions(x=self.x, y=self.y) |
|
|
|
|
|
class NoisedImageToInject: |
|
def __init__(self, image: Tensor, mask: Tensor, vae: VAE, start_percent: float, guarantee_steps: int=1, |
|
invert_mask=False, resize_image=True, strength_multival=None, |
|
img_inject_opts: NoisedImageInjectOptions=None): |
|
self.image = image |
|
self.mask = mask |
|
self.vae = vae |
|
self.invert_mask = invert_mask |
|
self.resize_image = resize_image |
|
self.strength_multival = 1.0 if strength_multival is None else strength_multival |
|
if img_inject_opts is None: |
|
img_inject_opts = NoisedImageInjectOptions() |
|
self.img_inject_opts = img_inject_opts |
|
|
|
self.start_percent = float(start_percent) |
|
self.start_t = 999999999.9 |
|
self.start_timestep = 999 |
|
self.guarantee_steps = guarantee_steps |
|
|
|
def clone(self): |
|
cloned = NoisedImageToInject(image=self.image, vae=self.vae, start_percent=self.start_percent, |
|
guarantee_steps=self.guarantee_steps, invert_mask=self.invert_mask, resize_image=self.resize_image, |
|
img_inject_opts=self.img_inject_opts) |
|
cloned.start_t = self.start_t |
|
cloned.start_timestep = self.start_timestep |
|
return cloned |
|
|
|
|
|
class NoisedImageToInjectGroup: |
|
def __init__(self): |
|
self.injections: list[NoisedImageToInject] = [] |
|
self._current_index: int = -1 |
|
self._current_used_steps: int = 0 |
|
|
|
@property |
|
def current_injection(self): |
|
return self.injections[self._current_index] |
|
|
|
def reset(self): |
|
self._current_index = -1 |
|
self._current_used_steps: int = 0 |
|
|
|
def add(self, to_inject: NoisedImageToInject): |
|
|
|
self.injections.append(to_inject) |
|
self.injections = get_sorted_list_via_attr(self.injections, "start_percent") |
|
|
|
def is_empty(self) -> bool: |
|
return len(self.injections) == 0 |
|
|
|
def has_index(self, index: int) -> int: |
|
return index >=0 and index < len(self.injections) |
|
|
|
def clone(self): |
|
cloned = NoisedImageToInjectGroup() |
|
for to_inject in self.injections: |
|
cloned.injections.append(to_inject) |
|
return cloned |
|
|
|
def initialize_timesteps(self, model: BaseModel): |
|
for to_inject in self.injections: |
|
to_inject.start_t = model.model_sampling.percent_to_sigma(to_inject.start_percent) |
|
to_inject.start_timestep = model.model_sampling.timestep(torch.tensor(to_inject.start_t)) |
|
|
|
def ksampler_get_injections(self, model: ModelPatcher, scheduler: str, sampler_name: str, denoise: float, force_full_denoise: bool, start_step: int, last_step: int, total_steps: int) -> tuple[list[list[int]], list[NoisedImageToInject]]: |
|
actual_last_step = min(last_step, total_steps) |
|
steps = list(range(start_step, actual_last_step+1)) |
|
|
|
sampler = comfy.samplers.KSampler(model, steps=total_steps, device=model.load_device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options) |
|
|
|
sigmas = sampler.sigmas |
|
if last_step is not None and last_step < (len(sigmas) - 1): |
|
sigmas = sigmas[:last_step + 1] |
|
if force_full_denoise: |
|
sigmas[-1] = 0 |
|
if start_step is not None: |
|
if start_step < (len(sigmas) - 1): |
|
sigmas = sigmas[start_step:] |
|
else: |
|
return [[start_step,actual_last_step], []] |
|
assert len(steps) == len(sigmas) |
|
model_sampling = model.get_model_object("model_sampling") |
|
timesteps = [model_sampling.timestep(x) for x in sigmas] |
|
|
|
ranges, injections = self._prepare_injections(timesteps=timesteps) |
|
|
|
steps_list = [[steps[x[0]],steps[x[1]-1]] for x in ranges] |
|
return steps_list, injections |
|
|
|
def custom_ksampler_get_injections(self, model: ModelPatcher, sigmas: Tensor) -> tuple[list[list[Tensor]], list[NoisedImageToInject]]: |
|
model_sampling = model.get_model_object("model_sampling") |
|
timesteps = [] |
|
for i in range(sigmas.shape[0]): |
|
timesteps.append(model_sampling.timestep(sigmas[i])) |
|
|
|
ranges, injections = self._prepare_injections(timesteps=timesteps) |
|
sigmas_list = [sigmas[x[0]:x[1]] for x in ranges] |
|
return sigmas_list, injections |
|
|
|
def _prepare_injections(self, timesteps: list[Tensor]) -> tuple[list[list[Tensor]], list[NoisedImageToInject]]: |
|
range_start = timesteps[0] |
|
range_end = timesteps[-1] |
|
|
|
if self.is_empty(): |
|
return ([(0, len(timesteps))], []) |
|
|
|
timesteps_list: list[list[Tensor]] = [] |
|
injection_list: list[NoisedImageToInject] = [] |
|
remaining_timesteps = timesteps.copy() |
|
remaining_offset = 0 |
|
|
|
for eval_c in self.injections: |
|
if len(remaining_timesteps) <= 2: |
|
break |
|
current_used_steps = 0 |
|
|
|
if eval_c.start_timestep > range_start: |
|
continue |
|
|
|
if eval_c.start_timestep < range_end: |
|
continue |
|
while current_used_steps < eval_c.guarantee_steps: |
|
if len(remaining_timesteps) <= 2: |
|
break |
|
|
|
broken_nicely = False |
|
for i in range(1, len(remaining_timesteps)-1): |
|
|
|
if eval_c.start_timestep < remaining_timesteps[i]: |
|
continue |
|
|
|
if len(remaining_timesteps[i:]) < 2: |
|
broken_nicely = True |
|
break |
|
new_timestep_range = (remaining_offset, remaining_offset+i+1) |
|
timesteps_list.append(new_timestep_range) |
|
injection_list.append(eval_c) |
|
current_used_steps += 1 |
|
remaining_timesteps = remaining_timesteps[i:] |
|
remaining_offset += i |
|
|
|
broken_nicely = True |
|
break |
|
|
|
if not broken_nicely: |
|
break |
|
|
|
|
|
timesteps_list.append((remaining_offset, remaining_offset+len(remaining_timesteps))) |
|
|
|
assert len(timesteps_list) == len(injection_list) + 1 |
|
return timesteps_list, injection_list |
|
|