How to run a Flux gguf model in Python
Hello,
I'm trying to use the quantized version of Flux-dev, but all doc I find online uses ComfyUI.
Any help on running the model in Python ( I'm used to work with llama.cpp, but that's more for text2text models and not image variations).
Any help is much appreciated
Thanks
It is not entirely clear how to use the local model (downloaded). If I try to specify ckpt_path as a local directory, I get an error. Can you give me a simple code example?
Probably a bit late of the OP but here's a basic GGUF using diffusers script
from diffusers import FluxPipeline, FluxTransformer2DModel, GGUFQuantizationConfig
import torch
prompt = "a moonim dressed as a knight, riding a horse towards a medieval castle"
#ckpt_path = "https://huggingface.co/city96/FLUX.1-dev-gguf/blob/main/flux1-dev-Q8_0.gguf"
ckpt_path = "/Volumes/SSD2TB/AI/caches/models/flux1-dev-Q8_0.gguf"
transformer = FluxTransformer2DModel.from_single_file(
ckpt_path,
quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
torch_dtype=torch.bfloat16,
)
pipeline = FluxPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev",
transformer=transformer,
torch_dtype=torch.bfloat16,
).to("cuda")
height, width = 1024, 1024
images = pipeline(
prompt=prompt,
num_inference_steps=15,
guidance_scale=5.0,
height=height,
width=width,
generator=torch.Generator("cuda").manual_seed(42)
).images[0]
images.save("gguf_image.png")
for mac users, a couple of modifications
from diffusers import FluxPipeline, FluxTransformer2DModel, GGUFQuantizationConfig
import torch
torch.mps.set_per_process_memory_fraction(0.0)
prompt = "a moonim dressed as a knight, riding a horse towards a medieval castle"
#ckpt_path = "https://huggingface.co/city96/FLUX.1-dev-gguf/blob/main/flux1-dev-Q8_0.gguf"
ckpt_path = "/Volumes/SSD2TB/AI/caches/models/flux1-dev-Q8_0.gguf"
transformer = FluxTransformer2DModel.from_single_file(
ckpt_path,
quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
torch_dtype=torch.bfloat16,
)
pipeline = FluxPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev",
transformer=transformer,
torch_dtype=torch.bfloat16,
).to("mps")
height, width = 1024, 1024
images = pipeline(
prompt=prompt,
num_inference_steps=15,
guidance_scale=5.0,
height=height,
width=width,
generator=torch.Generator("mps").manual_seed(42)
).images[0]
images.save("gguf_image.png")
or an alternative for Mac users with model unloading, runs better with lower memory configurations even though MacOS does a pretty good job of swapping out the parts of a model
that its done with.
from diffusers import FluxPipeline, FluxTransformer2DModel, GGUFQuantizationConfig
import torch
import gc
torch.mps.set_per_process_memory_fraction(0.0)
def flush():
gc.collect()
torch.mps.empty_cache()
gc.collect()
torch.mps.empty_cache()
prompt = "a moonim dressed as a knight, riding a horse towards a medieval castle"
ckpt_id = "black-forest-labs/FLUX.1-dev"
pipeline = FluxPipeline.from_pretrained(
ckpt_id,
transformer=None,
vae=None,
torch_dtype=torch.bfloat16,
).to("mps")
with torch.no_grad():
print("Encoding prompts.")
prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt(
prompt=prompt, prompt_2=prompt, max_sequence_length=256
)
del pipeline
flush()
ckpt_path = "https://huggingface.co/city96/FLUX.1-dev-gguf/blob/main/flux1-dev-Q8_0.gguf"
ckpt_path = "/Volumes/SSD2TB/AI/caches/models/flux1-dev-Q8_0.gguf"
transformer = FluxTransformer2DModel.from_single_file(
ckpt_path,
quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
torch_dtype=torch.bfloat16,
)
pipeline = FluxPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev",
text_encoder=None,
text_encoder_2=None,
tokenizer=None,
tokenizer_2=None,
transformer=transformer,
torch_dtype=torch.bfloat16,
).to("mps")
print("Running denoising.")
height, width = 1024, 1024
# No need to wrap it up under `torch.no_grad()` as pipeline call method
# is already wrapped under that.
images = pipeline(
prompt_embeds=prompt_embeds,
pooled_prompt_embeds=pooled_prompt_embeds,
num_inference_steps=15,
guidance_scale=5.0,
height=height,
width=width,
generator=torch.Generator("mps").manual_seed(42)
).images[0]
images.save("compile_image.png")