manbeast3b
/

fire_stang2

Model card Files Files and versions Community

manbeast3b commited on Feb 23

Commit

e5fb4b3

·

verified ·

1 Parent(s): 5cd3d1d

Update src/pipeline.py

Files changed (1) hide show

src/pipeline.py +3 -2

src/pipeline.py CHANGED Viewed

@@ -15,7 +15,7 @@ from diffusers import (
 )
 from transformers import T5EncoderModel
 from huggingface_hub.constants import HF_HUB_CACHE
-from torchao.quantization import quantize_, int8_weight_only
 from first_block_cache.diffusers_adapters import apply_cache_on_pipe
 from pipelines.models import TextToImageRequest
 from torch import Generator
@@ -100,7 +100,7 @@ class PipelineManager:
             torch_dtype=Config.DTYPE
         ).to(memory_format=torch.channels_last)
         vae = AutoencoderTiny.from_pretrained("RobertML/FLUX.1-schnell-vae_e3m2", revision="da0d2cd7815792fb40d084dbd8ed32b63f153d8d", torch_dtype=Config.DTYPE)
-        vae.encoder=_load(vae.encoder, "E", dtype=torch.bfloat16); vae.decoder=_load(vae.decoder, "D", dtype=torch.bfloat16)
         path = os.path.join(HF_HUB_CACHE, "models--RobertML--FLUX.1-schnell-int8wo/snapshots/307e0777d92df966a3c0f99f31a6ee8957a9857a")
         model = FluxTransformer2DModel.from_pretrained(
@@ -122,6 +122,7 @@ class PipelineManager:
         pipeline.to(memory_format=torch.channels_last)
         pipeline.vae = torch.compile(pipeline.vae, mode="max-autotune")
         quantize_(pipeline.vae, int8_weight_only())
         PipelineManager._warmup(pipeline)
         return pipeline

 )
 from transformers import T5EncoderModel
 from huggingface_hub.constants import HF_HUB_CACHE
+from torchao.quantization import quantize_, int8_weight_only, float8_weight_only
 from first_block_cache.diffusers_adapters import apply_cache_on_pipe
 from pipelines.models import TextToImageRequest
 from torch import Generator
             torch_dtype=Config.DTYPE
         ).to(memory_format=torch.channels_last)
         vae = AutoencoderTiny.from_pretrained("RobertML/FLUX.1-schnell-vae_e3m2", revision="da0d2cd7815792fb40d084dbd8ed32b63f153d8d", torch_dtype=Config.DTYPE)
+        # vae.encoder=_load(vae.encoder, "E", dtype=torch.bfloat16); vae.decoder=_load(vae.decoder, "D", dtype=torch.bfloat16)
         path = os.path.join(HF_HUB_CACHE, "models--RobertML--FLUX.1-schnell-int8wo/snapshots/307e0777d92df966a3c0f99f31a6ee8957a9857a")
         model = FluxTransformer2DModel.from_pretrained(
         pipeline.to(memory_format=torch.channels_last)
         pipeline.vae = torch.compile(pipeline.vae, mode="max-autotune")
         quantize_(pipeline.vae, int8_weight_only())
+        quantize_(pipeline.vae, float8_weight_only())
         PipelineManager._warmup(pipeline)
         return pipeline