Disty0
/

FLUX.1-dev-qint8

image-generation

Inference Endpoints

Model card Files Files and versions Community

Disty0 commited on Aug 11, 2024

Commit

b3e4533

·

verified ·

1 Parent(s): 1d3c711

Update README.md

Files changed (1) hide show

README.md +57 -0

README.md CHANGED Viewed

@@ -9,3 +9,60 @@ tags:
 - image-generation
 - flux
 ---

 - image-generation
 - flux
 ---
+Quantized to INT8 using Optimum Quanto.
+```shell
+pip install diffusers optimum-quanto
+```
+```python
+import json
+import torch
+import diffusers
+import transformers
+from optimum.quanto import requantize
+from safetensors.torch import load_file
+from huggingface_hub import hf_hub_download
+def load_quanto_transformer(repo_path):
+    with open(hf_hub_download(repo_path, "transformer/quantization_map.json"), "r") as f:
+        quantization_map = json.load(f)
+    with torch.device("meta"):
+        transformer = diffusers.FluxTransformer2DModel.from_config(hf_hub_download(repo_path, "transformer/config.json")).to(torch.bfloat16)
+    state_dict = load_file(hf_hub_download(repo_path, "transformer/diffusion_pytorch_model.safetensors"))
+    requantize(transformer, state_dict, quantization_map, device=torch.device("cpu"))
+    return transformer
+def load_quanto_text_encoder_2(repo_path):
+    with open(hf_hub_download(repo_path, "text_encoder_2/quantization_map.json"), "r") as f:
+        quantization_map = json.load(f)
+    with open(hf_hub_download(repo_path, "text_encoder_2/config.json")) as f:
+        t5_config = transformers.T5Config(**json.load(f))
+    with torch.device("meta"):
+        text_encoder_2 = transformers.T5EncoderModel(t5_config).to(torch.bfloat16)
+    state_dict = load_file(hf_hub_download(repo_path, "text_encoder_2/model.safetensors"))
+    requantize(text_encoder_2, state_dict, quantization_map, device=torch.device("cpu"))
+    return text_encoder_2
+pipe = diffusers.AutoPipelineForText2Image.from_pretrained("Disty0/FLUX.1-dev-qint8", transformer=None, text_encoder_2=None, torch_dtype=torch.bfloat16)
+pipe.transformer = load_quanto_transformer("Disty0/FLUX.1-dev-qint8")
+pipe.text_encoder_2 = load_quanto_text_encoder_2("Disty0/FLUX.1-dev-qint8")
+pipe = pipe.to("cuda", dtype=torch.bfloat16)
+prompt = "A cat holding a sign that says hello world"
+image = pipe(
+    prompt,
+    height=1024,
+    width=1024,
+    guidance_scale=3.5,
+    num_inference_steps=50,
+    max_sequence_length=512,
+    generator=torch.Generator("cpu").manual_seed(0)
+).images[0]
+image.save("flux-dev.png")
+```