jokerbit commited on
Commit
95e723e
·
verified ·
1 Parent(s): d23bb35

Upload src/pipeline.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. src/pipeline.py +14 -6
src/pipeline.py CHANGED
@@ -9,11 +9,17 @@ from huggingface_hub.constants import HF_HUB_CACHE
9
  from pipelines.models import TextToImageRequest
10
  from torch import Generator
11
  from torchao.quantization import quantize_, int8_weight_only
12
- from transformers import T5EncoderModel, CLIPTextModel
13
 
14
 
15
  Pipeline: TypeAlias = FluxPipeline
16
  torch.backends.cudnn.benchmark = True
 
 
 
 
 
 
17
 
18
  CHECKPOINT = "jokerbit/flux.1-schnell-Robert-int8wo"
19
  REVISION = "5ef0012f11a863e5111ec56540302a023bc8587b"
@@ -41,12 +47,14 @@ def load_pipeline() -> Pipeline:
41
  vae=vae,
42
  local_files_only=True,
43
  torch_dtype=torch.bfloat16,
44
- ).to("cuda")
45
 
46
- pipeline.vae.to(memory_format=torch.channels_last)
47
- quantize_(pipeline.vae, int8_weight_only())
48
- pipeline.vae = torch.compile(pipeline.vae, mode="reduce-overhead")
49
- # pipeline.to("cuda")
 
 
50
  for _ in range(2):
51
  pipeline("cat", num_inference_steps=4)
52
 
 
9
  from pipelines.models import TextToImageRequest
10
  from torch import Generator
11
  from torchao.quantization import quantize_, int8_weight_only
12
+ from transformers import T5EncoderModel, CLIPTextModel, logging
13
 
14
 
15
  Pipeline: TypeAlias = FluxPipeline
16
  torch.backends.cudnn.benchmark = True
17
+ torch.backends.cudnn.benchmark = True
18
+ torch._inductor.config.conv_1x1_as_mm = True
19
+ torch._inductor.config.coordinate_descent_tuning = True
20
+ torch._inductor.config.epilogue_fusion = False
21
+ torch._inductor.config.coordinate_descent_check_all_directions = True
22
+ os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
23
 
24
  CHECKPOINT = "jokerbit/flux.1-schnell-Robert-int8wo"
25
  REVISION = "5ef0012f11a863e5111ec56540302a023bc8587b"
 
47
  vae=vae,
48
  local_files_only=True,
49
  torch_dtype=torch.bfloat16,
50
+ )
51
 
52
+ pipeline.transformer.to(memory_format=torch.channels_last)
53
+ pipeline.vae.to(memory_format=torch.channels_last)
54
+ quantize_(pipeline.vae, int8_weight_only())
55
+ pipeline.vae = torch.compile(pipeline.vae, mode="reduce-overhead", fullgraph=True)
56
+ pipeline.to("cuda")
57
+
58
  for _ in range(2):
59
  pipeline("cat", num_inference_steps=4)
60