SDXL_Finetune_GGUF_Files / convert_mod.py

Rename convert.py to convert_mod.py

708634e verified 7 days ago

10 kB

	# (c) City96 \|\| Apache-2.0 (apache.org/licenses/LICENSE-2.0)
	import os
	import torch
	import gguf # This needs to be the llama.cpp one specifically!
	import argparse
	from tqdm import tqdm

	from safetensors.torch import load_file

	QUANTIZATION_THRESHOLD = 1024
	REARRANGE_THRESHOLD = 512
	MAX_TENSOR_NAME_LENGTH = 127

	class ModelTemplate:
	arch = "invalid" # string describing architecture
	shape_fix = False # whether to reshape tensors
	keys_detect = [] # list of lists to match in state dict
	keys_banned = [] # list of keys that should mark model as invalid for conversion

	class ModelFlux(ModelTemplate):
	arch = "flux"
	keys_detect = [
	("transformer_blocks.0.attn.norm_added_k.weight",),
	("double_blocks.0.img_attn.proj.weight",),
	]
	keys_banned = ["transformer_blocks.0.attn.norm_added_k.weight",]

	class ModelSD3(ModelTemplate):
	arch = "sd3"
	keys_detect = [
	("transformer_blocks.0.attn.add_q_proj.weight",),
	("joint_blocks.0.x_block.attn.qkv.weight",),
	]
	keys_banned = ["transformer_blocks.0.attn.add_q_proj.weight",]

	class ModelAura(ModelTemplate):
	arch = "aura"
	keys_detect = [
	("double_layers.3.modX.1.weight",),
	("joint_transformer_blocks.3.ff_context.out_projection.weight",),
	]
	keys_banned = ["joint_transformer_blocks.3.ff_context.out_projection.weight",]

	class ModelLTXV(ModelTemplate):
	arch = "ltxv"
	keys_detect = [
	(
	"adaln_single.emb.timestep_embedder.linear_2.weight",
	"transformer_blocks.27.scale_shift_table",
	"caption_projection.linear_2.weight",
	)
	]

	class ModelSDXL(ModelTemplate):
	arch = "sdxl"
	shape_fix = True
	keys_detect = [
	("down_blocks.0.downsamplers.0.conv.weight", "add_embedding.linear_1.weight",),
	(
	"input_blocks.3.0.op.weight", "input_blocks.6.0.op.weight",
	"output_blocks.2.2.conv.weight", "output_blocks.5.2.conv.weight",
	), # Non-diffusers
	("label_emb.0.0.weight",),
	]

	class ModelSD1(ModelTemplate):
	arch = "sd1"
	shape_fix = True
	keys_detect = [
	("down_blocks.0.downsamplers.0.conv.weight",),
	(
	"input_blocks.3.0.op.weight", "input_blocks.6.0.op.weight", "input_blocks.9.0.op.weight",
	"output_blocks.2.1.conv.weight", "output_blocks.5.2.conv.weight", "output_blocks.8.2.conv.weight"
	), # Non-diffusers
	]

	class ModelClipG(ModelTemplate):
	arch = "clip_g"
	keys_detect = [
	("conditioner.embedders.1.model.ln_final.bias",), # Final layer normalization bias
	(
	"conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight", # Attention input projection weight
	"conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight", # Attention input projection weight for another block
	),
	]
	keys_banned = [] # Add any banned keys if necessary

	# The architectures are checked in order and the first successful match terminates the search.
	arch_list = [ModelFlux, ModelSD3, ModelAura, ModelLTXV, ModelSDXL, ModelSD1, ModelClipG]

	def is_model_arch(model, state_dict):
	# check if model is correct
	matched = False
	invalid = False
	for match_list in model.keys_detect:
	print(f"Checking match list: {match_list}")
	if all(key in state_dict for key in match_list):
	print(f"Match found for {match_list}")
	matched = True
	invalid = any(key in state_dict for key in model.keys_banned)
	break
	assert not invalid, "Model architecture not allowed for conversion! (i.e. reference VS diffusers format)"
	return matched

	def detect_arch(state_dict):
	model_arch = None
	for arch in arch_list:
	if is_model_arch(arch, state_dict):
	model_arch = arch
	break
	assert model_arch is not None, "Unknown model architecture!"
	return model_arch

	def parse_args():
	parser = argparse.ArgumentParser(description="Generate F16 GGUF files from single UNET")
	parser.add_argument("--src", required=True, help="Source model ckpt file.")
	parser.add_argument("--dst", help="Output unet gguf file.")
	args = parser.parse_args()

	if not os.path.isfile(args.src):
	parser.error("No input provided!")

	return args

	def load_state_dict(path):
	if any(path.endswith(x) for x in [".ckpt", ".pt", ".bin", ".pth"]):
	state_dict = torch.load(path, map_location="cpu", weights_only=True)
	state_dict = state_dict.get("model", state_dict)
	else:
	state_dict = load_file(path)

	# only keep unet with no prefix!
	prefix = None
	for pfx in ["model.diffusion_model.", "model."]:
	if any([x.startswith(pfx) for x in state_dict.keys()]):
	prefix = pfx
	break

	sd = {}
	for k, v in state_dict.items():
	if prefix and prefix not in k:
	continue
	if prefix:
	k = k.replace(prefix, "")
	sd[k] = v

	return sd

	def load_model(path):
	state_dict = load_state_dict(path)
	model_arch = detect_arch(state_dict)
	print(f"* Architecture detected from input: {model_arch.arch}")
	writer = gguf.GGUFWriter(path=None, arch=model_arch.arch)
	return (writer, state_dict, model_arch)

	def handle_tensors(args, writer, state_dict, model_arch):
	name_lengths = tuple(sorted(
	((key, len(key)) for key in state_dict.keys()),
	key=lambda item: item[1],
	reverse=True,
	))
	if not name_lengths:
	return
	max_name_len = name_lengths[0][1]
	if max_name_len > MAX_TENSOR_NAME_LENGTH:
	bad_list = ", ".join(f"{key!r} ({namelen})" for key, namelen in name_lengths if namelen > MAX_TENSOR_NAME_LENGTH)
	raise ValueError(f"Can only handle tensor names up to {MAX_TENSOR_NAME_LENGTH} characters. Tensors exceeding the limit: {bad_list}")
	for key, data in tqdm(state_dict.items()):
	old_dtype = data.dtype

	if data.dtype == torch.bfloat16:
	data = data.to(torch.float32).numpy()
	# this is so we don't break torch 2.0.X
	elif data.dtype in [getattr(torch, "float8_e4m3fn", "_invalid"), getattr(torch, "float8_e5m2", "_invalid")]:
	data = data.to(torch.float16).numpy()
	else:
	data = data.numpy()

	n_dims = len(data.shape)
	data_shape = data.shape
	data_qtype = getattr(
	gguf.GGMLQuantizationType,
	"BF16" if old_dtype == torch.bfloat16 else "F16"
	)

	# get number of parameters (AKA elements) in this tensor
	n_params = 1
	for dim_size in data_shape:
	n_params *= dim_size

	# keys to keep as max precision
	blacklist = {
	"time_embedding.",
	"add_embedding.",
	"time_in.",
	"txt_in.",
	"vector_in.",
	"img_in.",
	"guidance_in.",
	"final_layer.",
	}

	if old_dtype in (torch.float32, torch.bfloat16):
	if n_dims == 1:
	# one-dimensional tensors should be kept in F32
	# also speeds up inference due to not dequantizing
	data_qtype = gguf.GGMLQuantizationType.F32

	elif n_params <= QUANTIZATION_THRESHOLD:
	# very small tensors
	data_qtype = gguf.GGMLQuantizationType.F32

	elif ".weight" in key and any(x in key for x in blacklist):
	data_qtype = gguf.GGMLQuantizationType.F32

	if (model_arch.shape_fix # NEVER reshape for models such as flux
	and n_dims > 1 # Skip one-dimensional tensors
	and n_params >= REARRANGE_THRESHOLD # Only rearrange tensors meeting the size requirement
	and (n_params / 256).is_integer() # Rearranging only makes sense if total elements is divisible by 256
	and not (data.shape[-1] / 256).is_integer() # Only need to rearrange if the last dimension is not divisible by 256
	):
	orig_shape = data.shape
	data = data.reshape(n_params // 256, 256)
	writer.add_array(f"comfy.gguf.orig_shape.{key}", tuple(int(dim) for dim in orig_shape))

	try:
	data = gguf.quants.quantize(data, data_qtype)
	except (AttributeError, gguf.QuantError) as e:
	tqdm.write(f"falling back to F16: {e}")
	data_qtype = gguf.GGMLQuantizationType.F16
	data = gguf.quants.quantize(data, data_qtype)

	new_name = key # do we need to rename?

	shape_str = f"{{{', '.join(str(n) for n in reversed(data.shape))}}}"
	tqdm.write(f"{f'%-{max_name_len + 4}s' % f'{new_name}'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")

	writer.add_tensor(new_name, data, raw_dtype=data_qtype)

	if __name__ == "__main__":
	args = parse_args()
	path = args.src
	writer, state_dict, model_arch = load_model(path)

	writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
	if next(iter(state_dict.values())).dtype == torch.bfloat16:
	out_path = f"{os.path.splitext(path)[0]}-BF16.gguf"
	writer.add_file_type(gguf.LlamaFileType.MOSTLY_BF16)
	else:
	out_path = f"{os.path.splitext(path)[0]}-F16.gguf"
	writer.add_file_type(gguf.LlamaFileType.MOSTLY_F16)

	out_path = args.dst or out_path
	if os.path.isfile(out_path):
	input("Output exists enter to continue or ctrl+c to abort!")

	handle_tensors(path, writer, state_dict, model_arch)
	writer.write_header_to_file(path=out_path)
	writer.write_kv_data_to_file()
	writer.write_tensors_to_file(progress=True)
	writer.close()