jokerbit commited on
Commit
c31a6b2
·
verified ·
1 Parent(s): f34d132

Upload src/pipeline.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. src/pipeline.py +4 -3
src/pipeline.py CHANGED
@@ -10,6 +10,7 @@ from pipelines.models import TextToImageRequest
10
  from torch import Generator
11
  from torchao.quantization import quantize_, int8_weight_only
12
  from transformers import T5EncoderModel, CLIPTextModel
 
13
 
14
 
15
  Pipeline: TypeAlias = FluxPipeline
@@ -50,10 +51,10 @@ def load_pipeline() -> Pipeline:
50
  )
51
 
52
  pipeline.transformer.to(memory_format=torch.channels_last)
53
- pipeline.text_encoder = torch.compile(pipeline.text_encoder, mode="reduce-overhead")
54
- # quantize_(pipeline.vae, int8_weight_only())
55
- # pipeline.vae = torch.compile(pipeline.vae, mode="reduce-overhead")
56
  pipeline.to("cuda")
 
57
  for _ in range(2):
58
  pipeline("cat", num_inference_steps=4)
59
 
 
10
  from torch import Generator
11
  from torchao.quantization import quantize_, int8_weight_only
12
  from transformers import T5EncoderModel, CLIPTextModel
13
+ import torch_tensorrt
14
 
15
 
16
  Pipeline: TypeAlias = FluxPipeline
 
51
  )
52
 
53
  pipeline.transformer.to(memory_format=torch.channels_last)
54
+ # quantize_(pipeline.vae, int8_weight_only())
55
+ pipeline.vae = torch.compile(pipeline.vae, backend="tensorrt")
 
56
  pipeline.to("cuda")
57
+
58
  for _ in range(2):
59
  pipeline("cat", num_inference_steps=4)
60