jokerbit commited on
Commit
e4f08b5
·
verified ·
1 Parent(s): c5a412c

Upload src/pipeline.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. src/pipeline.py +15 -7
src/pipeline.py CHANGED
@@ -4,21 +4,23 @@ from typing import TypeAlias
4
 
5
  import torch
6
  from PIL.Image import Image
7
- from diffusers import FluxPipeline, FluxTransformer2DModel, AutoencoderKL, AutoencoderTiny
8
  from huggingface_hub.constants import HF_HUB_CACHE
9
  from pipelines.models import TextToImageRequest
10
  from torch import Generator
11
  from torchao.quantization import quantize_, int8_weight_only
12
- from transformers import T5EncoderModel, CLIPTextModel
 
13
 
14
- os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
15
  Pipeline: TypeAlias = FluxPipeline
 
 
16
  torch._inductor.config.conv_1x1_as_mm = True
17
  torch._inductor.config.coordinate_descent_tuning = True
18
  torch._inductor.config.epilogue_fusion = False
19
  torch._inductor.config.coordinate_descent_check_all_directions = True
20
- torch.backends.cudnn.benchmark = True
21
-
22
  CHECKPOINT = "jokerbit/flux.1-schnell-Robert-int8wo"
23
  REVISION = "5ef0012f11a863e5111ec56540302a023bc8587b"
24
 
@@ -33,7 +35,7 @@ def load_pipeline() -> Pipeline:
33
  use_safetensors=False,
34
  local_files_only=True,
35
  torch_dtype=torch.bfloat16)
36
-
37
  pipeline = FluxPipeline.from_pretrained(
38
  CHECKPOINT,
39
  revision=REVISION,
@@ -41,9 +43,14 @@ def load_pipeline() -> Pipeline:
41
  local_files_only=True,
42
  torch_dtype=torch.bfloat16,
43
  )
 
44
  pipeline.transformer.to(memory_format=torch.channels_last)
 
 
 
45
  pipeline.to("cuda")
46
- for _ in range(4):
 
47
  pipeline("cat", num_inference_steps=4)
48
 
49
  return pipeline
@@ -78,3 +85,4 @@ if __name__ == "__main__":
78
  infer(request, pipe_)
79
  stop_time = perf_counter()
80
  print(f"Request in {stop_time - start_time}s")
 
 
4
 
5
  import torch
6
  from PIL.Image import Image
7
+ from diffusers import FluxPipeline, FluxTransformer2DModel, AutoencoderKL, AutoencoderTiny, DiffusionPipeline
8
  from huggingface_hub.constants import HF_HUB_CACHE
9
  from pipelines.models import TextToImageRequest
10
  from torch import Generator
11
  from torchao.quantization import quantize_, int8_weight_only
12
+ from transformers import T5EncoderModel, CLIPTextModel, logging
13
+
14
 
 
15
  Pipeline: TypeAlias = FluxPipeline
16
+ torch.backends.cudnn.benchmark = True
17
+ torch.backends.cudnn.benchmark = True
18
  torch._inductor.config.conv_1x1_as_mm = True
19
  torch._inductor.config.coordinate_descent_tuning = True
20
  torch._inductor.config.epilogue_fusion = False
21
  torch._inductor.config.coordinate_descent_check_all_directions = True
22
+ os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
23
+ os.environ["TOKENIZERS_PARALLELISM"] = "True"
24
  CHECKPOINT = "jokerbit/flux.1-schnell-Robert-int8wo"
25
  REVISION = "5ef0012f11a863e5111ec56540302a023bc8587b"
26
 
 
35
  use_safetensors=False,
36
  local_files_only=True,
37
  torch_dtype=torch.bfloat16)
38
+
39
  pipeline = FluxPipeline.from_pretrained(
40
  CHECKPOINT,
41
  revision=REVISION,
 
43
  local_files_only=True,
44
  torch_dtype=torch.bfloat16,
45
  )
46
+
47
  pipeline.transformer.to(memory_format=torch.channels_last)
48
+ pipeline.vae.to(memory_format=torch.channels_last)
49
+ # quantize_(pipeline.vae, int8_weight_only())
50
+ pipeline.vae = torch.compile(pipeline.vae, fullgraph=True, mode="max-autotune")
51
  pipeline.to("cuda")
52
+
53
+ for _ in range(2):
54
  pipeline("cat", num_inference_steps=4)
55
 
56
  return pipeline
 
85
  infer(request, pipe_)
86
  stop_time = perf_counter()
87
  print(f"Request in {stop_time - start_time}s")
88
+