jokerbit
/

flux-qa-5

Model card Files Files and versions Community

jokerbit commited on Feb 13

Commit

1f060f6

·

verified ·

1 Parent(s): 62d547b

Update src/pipeline.py

Files changed (1) hide show

src/pipeline.py +3 -3

src/pipeline.py CHANGED Viewed

@@ -11,7 +11,7 @@ from torch import Generator
 from torchao.quantization import quantize_, int8_weight_only
 from transformers import T5EncoderModel, CLIPTextModel, logging
 from functools import partial
 my_partial_compile = partial(torch.compile, mode="max-autotune")
 Pipeline: TypeAlias = FluxPipeline
@@ -52,9 +52,9 @@ def load_pipeline() -> Pipeline:
     ).to("cuda")
     pipeline.to(memory_format=torch.channels_last)
-    quantize_(pipeline.vae, int8_weight_only())
     pipeline.vae = my_partial_compile(pipeline.vae)
-    pipeline.transformer = torch.compile(pipeline.transformer)
     with torch.inference_mode():
         for _ in range(2):
             pipeline("cats running on a road with a dog chasing", num_inference_steps=4)

 from torchao.quantization import quantize_, int8_weight_only
 from transformers import T5EncoderModel, CLIPTextModel, logging
 from functools import partial
+from para_attn.first_block_cache.diffusers_adapters import apply_cache_on_pipe
 my_partial_compile = partial(torch.compile, mode="max-autotune")
 Pipeline: TypeAlias = FluxPipeline
     ).to("cuda")
     pipeline.to(memory_format=torch.channels_last)
+    # quantize_(pipeline.vae, int8_weight_only())
     pipeline.vae = my_partial_compile(pipeline.vae)
+    apply_cache_on_pipe(pipeline, residual_diff_threshold=0.25)
     with torch.inference_mode():
         for _ in range(2):
             pipeline("cats running on a road with a dog chasing", num_inference_steps=4)