radames commited on
Commit
1d1e539
·
1 Parent(s): 141db8f

IP Adpater examples

Browse files
server/pipelines/IPcompositionHyperSD15.py ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import (
2
+ DiffusionPipeline,
3
+ TCDScheduler,
4
+ )
5
+ from compel import Compel
6
+ import torch
7
+ from transformers import CLIPVisionModelWithProjection
8
+ from huggingface_hub import hf_hub_download
9
+
10
+ try:
11
+ import intel_extension_for_pytorch as ipex # type: ignore
12
+ except:
13
+ pass
14
+
15
+ from config import Args
16
+ from pydantic import BaseModel, Field
17
+ from PIL import Image
18
+
19
+ model_id = "runwayml/stable-diffusion-v1-5"
20
+ ip_adapter_model = "ostris/ip-composition-adapter"
21
+ file_name = "ip_plus_composition_sd15.safetensors"
22
+
23
+ default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
24
+ default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
25
+ page_content = """
26
+ <h1 class="text-3xl font-bold">Hyper-SD Unified + IP Adpater Composition</h1>
27
+ <h3 class="text-xl font-bold">Image-to-Image ControlNet</h3>
28
+
29
+ """
30
+
31
+
32
+ class Pipeline:
33
+ class Info(BaseModel):
34
+ name: str = "controlnet+SDXL+Turbo"
35
+ title: str = "SDXL Turbo + Controlnet"
36
+ description: str = "Generates an image from a text prompt"
37
+ input_mode: str = "image"
38
+ page_content: str = page_content
39
+
40
+ class InputParams(BaseModel):
41
+ prompt: str = Field(
42
+ default_prompt,
43
+ title="Prompt",
44
+ field="textarea",
45
+ id="prompt",
46
+ )
47
+ negative_prompt: str = Field(
48
+ default_negative_prompt,
49
+ title="Negative Prompt",
50
+ field="textarea",
51
+ id="negative_prompt",
52
+ hide=True,
53
+ )
54
+ seed: int = Field(
55
+ 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
56
+ )
57
+ steps: int = Field(
58
+ 2, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
59
+ )
60
+ width: int = Field(
61
+ 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
62
+ )
63
+ height: int = Field(
64
+ 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
65
+ )
66
+ guidance_scale: float = Field(
67
+ 0.0,
68
+ min=0,
69
+ max=10,
70
+ step=0.001,
71
+ title="Guidance Scale",
72
+ field="range",
73
+ hide=True,
74
+ id="guidance_scale",
75
+ )
76
+ ip_adapter_scale: float = Field(
77
+ 0.8,
78
+ min=0.0,
79
+ max=1.0,
80
+ step=0.001,
81
+ title="IP Adapter Scale",
82
+ field="range",
83
+ hide=True,
84
+ id="ip_adapter_scale",
85
+ )
86
+ eta: float = Field(
87
+ 1.0,
88
+ min=0,
89
+ max=1.0,
90
+ step=0.001,
91
+ title="Eta",
92
+ field="range",
93
+ hide=True,
94
+ id="eta",
95
+ )
96
+
97
+ def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
98
+ image_encoder = CLIPVisionModelWithProjection.from_pretrained(
99
+ "h94/IP-Adapter",
100
+ subfolder="models/image_encoder",
101
+ torch_dtype=torch.float16,
102
+ ).to(device)
103
+
104
+ if args.safety_checker:
105
+ self.pipe = DiffusionPipeline.from_pretrained(
106
+ model_id,
107
+ # vae=vae,
108
+ torch_dtype=torch_dtype,
109
+ image_encoder=image_encoder,
110
+ variant="fp16",
111
+ )
112
+ else:
113
+ self.pipe = DiffusionPipeline.from_pretrained(
114
+ model_id,
115
+ safety_checker=None,
116
+ torch_dtype=torch_dtype,
117
+ image_encoder=image_encoder,
118
+ variant="fp16",
119
+ )
120
+
121
+ self.pipe.load_ip_adapter(
122
+ ip_adapter_model,
123
+ subfolder="",
124
+ weight_name=[file_name],
125
+ image_encoder_folder=None,
126
+ )
127
+
128
+ self.pipe.load_lora_weights(
129
+ hf_hub_download("ByteDance/Hyper-SD", "Hyper-SD15-1step-lora.safetensors")
130
+ )
131
+ self.pipe.fuse_lora()
132
+
133
+ self.pipe.scheduler = TCDScheduler.from_config(self.pipe.scheduler.config)
134
+ self.pipe.set_ip_adapter_scale([0.8])
135
+
136
+ # if args.compile:
137
+ # pipe.unet = oneflow_compile(pipe.unet, options=compile_options)
138
+ # pipe.vae.decoder = oneflow_compile(pipe.vae.decoder, options=compile_options)
139
+
140
+ if args.sfast:
141
+ from sfast.compilers.stable_diffusion_pipeline_compiler import (
142
+ compile,
143
+ CompilationConfig,
144
+ )
145
+
146
+ config = CompilationConfig.Default()
147
+ # config.enable_xformers = True
148
+ config.enable_triton = True
149
+ config.enable_cuda_graph = True
150
+ # cofig.
151
+ self.pipe = compile(self.pipe, config=config)
152
+
153
+ self.pipe.set_progress_bar_config(disable=True)
154
+ self.pipe.to(device=device)
155
+ if device.type != "mps":
156
+ self.pipe.unet.to(memory_format=torch.channels_last)
157
+
158
+ if args.compel:
159
+ self.compel_proc = Compel(
160
+ tokenizer=self.pipe.tokenizer,
161
+ text_encoder=self.pipe.text_encoder,
162
+ truncate_long_prompts=False,
163
+ )
164
+
165
+ if args.torch_compile:
166
+ self.pipe.unet = torch.compile(
167
+ self.pipe.unet, mode="reduce-overhead", fullgraph=True
168
+ )
169
+ self.pipe.vae = torch.compile(
170
+ self.pipe.vae, mode="reduce-overhead", fullgraph=True
171
+ )
172
+ self.pipe(
173
+ prompt="warmup",
174
+ image=[Image.new("RGB", (768, 768))],
175
+ )
176
+
177
+ def predict(self, params: "Pipeline.InputParams") -> Image.Image:
178
+ generator = torch.manual_seed(params.seed)
179
+ self.pipe.set_ip_adapter_scale([params.ip_adapter_scale])
180
+
181
+ prompt_embeds = None
182
+ prompt = params.prompt
183
+ if hasattr(self, "compel_proc"):
184
+ prompt_embeds = self.compel_proc(prompt)
185
+ prompt = None
186
+
187
+ steps = params.steps
188
+
189
+ results = self.pipe(
190
+ prompt=prompt,
191
+ prompt_embeds=prompt_embeds,
192
+ generator=generator,
193
+ num_inference_steps=steps,
194
+ guidance_scale=params.guidance_scale,
195
+ width=params.width,
196
+ eta=params.eta,
197
+ height=params.height,
198
+ ip_adapter_image=[params.image],
199
+ output_type="pil",
200
+ )
201
+
202
+ nsfw_content_detected = (
203
+ results.nsfw_content_detected[0]
204
+ if "nsfw_content_detected" in results
205
+ else False
206
+ )
207
+ if nsfw_content_detected:
208
+ return None
209
+ result_image = results.images[0]
210
+
211
+ return result_image
server/pipelines/IPcompositionHyperSDXL.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import (
2
+ StableDiffusionXLPipeline,
3
+ AutoencoderKL,
4
+ TCDScheduler,
5
+ )
6
+ from compel import Compel, ReturnedEmbeddingsType
7
+ import torch
8
+ from transformers import CLIPVisionModelWithProjection
9
+ from huggingface_hub import hf_hub_download
10
+
11
+ try:
12
+ import intel_extension_for_pytorch as ipex # type: ignore
13
+ except:
14
+ pass
15
+
16
+ from config import Args
17
+ from pydantic import BaseModel, Field
18
+ from PIL import Image
19
+
20
+ model_id = "stabilityai/stable-diffusion-xl-base-1.0"
21
+ taesd_model = "madebyollin/taesdxl"
22
+ ip_adapter_model = "ostris/ip-composition-adapter"
23
+ file_name = "ip_plus_composition_sdxl.safetensors"
24
+
25
+ default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
26
+ default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
27
+ page_content = """
28
+ <h1 class="text-3xl font-bold">Hyper-SDXL Unified + IP Adpater Composition</h1>
29
+ <h3 class="text-xl font-bold">Image-to-Image ControlNet</h3>
30
+
31
+ """
32
+
33
+
34
+ class Pipeline:
35
+ class Info(BaseModel):
36
+ name: str = "controlnet+SDXL+Turbo"
37
+ title: str = "SDXL Turbo + Controlnet"
38
+ description: str = "Generates an image from a text prompt"
39
+ input_mode: str = "image"
40
+ page_content: str = page_content
41
+
42
+ class InputParams(BaseModel):
43
+ prompt: str = Field(
44
+ default_prompt,
45
+ title="Prompt",
46
+ field="textarea",
47
+ id="prompt",
48
+ )
49
+ negative_prompt: str = Field(
50
+ default_negative_prompt,
51
+ title="Negative Prompt",
52
+ field="textarea",
53
+ id="negative_prompt",
54
+ hide=True,
55
+ )
56
+ seed: int = Field(
57
+ 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
58
+ )
59
+ steps: int = Field(
60
+ 2, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
61
+ )
62
+ width: int = Field(
63
+ 1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
64
+ )
65
+ height: int = Field(
66
+ 1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
67
+ )
68
+ guidance_scale: float = Field(
69
+ 0.0,
70
+ min=0,
71
+ max=10,
72
+ step=0.001,
73
+ title="Guidance Scale",
74
+ field="range",
75
+ hide=True,
76
+ id="guidance_scale",
77
+ )
78
+ ip_adapter_scale: float = Field(
79
+ 0.8,
80
+ min=0.0,
81
+ max=1.0,
82
+ step=0.001,
83
+ title="IP Adapter Scale",
84
+ field="range",
85
+ hide=True,
86
+ id="ip_adapter_scale",
87
+ )
88
+ eta: float = Field(
89
+ 1.0,
90
+ min=0,
91
+ max=1.0,
92
+ step=0.001,
93
+ title="Eta",
94
+ field="range",
95
+ hide=True,
96
+ id="eta",
97
+ )
98
+
99
+ def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
100
+ vae = AutoencoderKL.from_pretrained(
101
+ "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype
102
+ )
103
+ image_encoder = CLIPVisionModelWithProjection.from_pretrained(
104
+ "h94/IP-Adapter",
105
+ subfolder="models/image_encoder",
106
+ torch_dtype=torch.float16,
107
+ ).to(device)
108
+
109
+ if args.safety_checker:
110
+ self.pipe = StableDiffusionXLPipeline.from_pretrained(
111
+ model_id,
112
+ # vae=vae,
113
+ torch_dtype=torch_dtype,
114
+ image_encoder=image_encoder,
115
+ variant="fp16",
116
+ )
117
+ else:
118
+ self.pipe = StableDiffusionXLPipeline.from_pretrained(
119
+ model_id,
120
+ safety_checker=None,
121
+ torch_dtype=torch_dtype,
122
+ vae=vae,
123
+ image_encoder=image_encoder,
124
+ variant="fp16",
125
+ )
126
+ self.pipe.load_ip_adapter(
127
+ ip_adapter_model,
128
+ subfolder="",
129
+ weight_name=[file_name],
130
+ image_encoder_folder=None,
131
+ )
132
+
133
+ self.pipe.load_lora_weights(
134
+ hf_hub_download("ByteDance/Hyper-SD", "Hyper-SDXL-1step-lora.safetensors")
135
+ )
136
+ self.pipe.fuse_lora()
137
+
138
+ self.pipe.scheduler = TCDScheduler.from_config(self.pipe.scheduler.config)
139
+ self.pipe.set_ip_adapter_scale([0.8])
140
+
141
+ if args.sfast:
142
+ from sfast.compilers.stable_diffusion_pipeline_compiler import (
143
+ compile,
144
+ CompilationConfig,
145
+ )
146
+
147
+ config = CompilationConfig.Default()
148
+ # config.enable_xformers = True
149
+ config.enable_triton = True
150
+ config.enable_cuda_graph = True
151
+ self.pipe = compile(self.pipe, config=config)
152
+
153
+ self.pipe.set_progress_bar_config(disable=True)
154
+ self.pipe.to(device=device)
155
+ if device.type != "mps":
156
+ self.pipe.unet.to(memory_format=torch.channels_last)
157
+
158
+ if args.compel:
159
+ self.pipe.compel_proc = Compel(
160
+ tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
161
+ text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
162
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
163
+ requires_pooled=[False, True],
164
+ )
165
+
166
+ if args.torch_compile:
167
+ self.pipe.unet = torch.compile(
168
+ self.pipe.unet, mode="reduce-overhead", fullgraph=True
169
+ )
170
+ self.pipe.vae = torch.compile(
171
+ self.pipe.vae, mode="reduce-overhead", fullgraph=True
172
+ )
173
+ self.pipe(
174
+ prompt="warmup",
175
+ image=[Image.new("RGB", (768, 768))],
176
+ )
177
+
178
+ def predict(self, params: "Pipeline.InputParams") -> Image.Image:
179
+ generator = torch.manual_seed(params.seed)
180
+ self.pipe.set_ip_adapter_scale([params.ip_adapter_scale])
181
+
182
+ prompt = params.prompt
183
+ negative_prompt = params.negative_prompt
184
+ prompt_embeds = None
185
+ pooled_prompt_embeds = None
186
+ negative_prompt_embeds = None
187
+ negative_pooled_prompt_embeds = None
188
+ if hasattr(self.pipe, "compel_proc"):
189
+ _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
190
+ [params.prompt, params.negative_prompt]
191
+ )
192
+ prompt = None
193
+ negative_prompt = None
194
+ prompt_embeds = _prompt_embeds[0:1]
195
+ pooled_prompt_embeds = pooled_prompt_embeds[0:1]
196
+ negative_prompt_embeds = _prompt_embeds[1:2]
197
+ negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
198
+
199
+ steps = params.steps
200
+
201
+ results = self.pipe(
202
+ prompt=prompt,
203
+ negative_prompt=negative_prompt,
204
+ prompt_embeds=prompt_embeds,
205
+ pooled_prompt_embeds=pooled_prompt_embeds,
206
+ negative_prompt_embeds=negative_prompt_embeds,
207
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
208
+ generator=generator,
209
+ num_inference_steps=steps,
210
+ guidance_scale=params.guidance_scale,
211
+ width=params.width,
212
+ eta=params.eta,
213
+ height=params.height,
214
+ ip_adapter_image=[params.image],
215
+ output_type="pil",
216
+ )
217
+
218
+ nsfw_content_detected = (
219
+ results.nsfw_content_detected[0]
220
+ if "nsfw_content_detected" in results
221
+ else False
222
+ )
223
+ if nsfw_content_detected:
224
+ return None
225
+ result_image = results.images[0]
226
+
227
+ return result_image
server/pipelines/controlnetHyperSDXL.py CHANGED
@@ -20,7 +20,8 @@ from pydantic import BaseModel, Field
20
  from PIL import Image
21
  import math
22
 
23
- controlnet_model = "diffusers/controlnet-canny-sdxl-1.0"
 
24
  model_id = "stabilityai/stable-diffusion-xl-base-1.0"
25
  taesd_model = "madebyollin/taesdxl"
26
 
@@ -192,7 +193,7 @@ class Pipeline:
192
  )
193
 
194
  config = CompilationConfig.Default()
195
- config.enable_xformers = True
196
  config.enable_triton = True
197
  config.enable_cuda_graph = True
198
  self.pipe = compile(self.pipe, config=config)
 
20
  from PIL import Image
21
  import math
22
 
23
+ # controlnet_model = "diffusers/controlnet-canny-sdxl-1.0"
24
+ controlnet_model = "xinsir/controlnet-canny-sdxl-1.0"
25
  model_id = "stabilityai/stable-diffusion-xl-base-1.0"
26
  taesd_model = "madebyollin/taesdxl"
27
 
 
193
  )
194
 
195
  config = CompilationConfig.Default()
196
+ # config.enable_xformers = True
197
  config.enable_triton = True
198
  config.enable_cuda_graph = True
199
  self.pipe = compile(self.pipe, config=config)
server/requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
- diffusers==0.28.0
2
  transformers==4.41.1
3
  --extra-index-url https://download.pytorch.org/whl/cu121;
4
- torch==2.2.0
5
  fastapi==0.111.0
6
  uvicorn[standard]==0.30.0
7
  Pillow==10.3.0
@@ -12,8 +12,8 @@ peft==0.11.1
12
  xformers; sys_platform != 'darwin' or platform_machine != 'arm64'
13
  markdown2
14
  safetensors
15
- stable_fast @ https://github.com/chengzeyi/stable-fast/releases/download/v1.0.4/stable_fast-1.0.4+torch220cu121-cp310-cp310-manylinux2014_x86_64.whl ; sys_platform != 'darwin' or platform_machine != 'arm64'
16
- oneflow @ https://github.com/siliconflow/oneflow_releases/releases/download/community_cu121/oneflow-0.9.1.dev20240316+cu121-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl ; sys_platform != 'darwin' or platform_machine != 'arm64'
17
  onediff @ git+https://github.com/siliconflow/onediff.git@main#egg=onediff ; sys_platform != 'darwin' or platform_machine != 'arm64'
18
  setuptools
19
  mpmath==1.3.0
 
1
+ diffusers==0.28.2
2
  transformers==4.41.1
3
  --extra-index-url https://download.pytorch.org/whl/cu121;
4
+ torch==2.2.2
5
  fastapi==0.111.0
6
  uvicorn[standard]==0.30.0
7
  Pillow==10.3.0
 
12
  xformers; sys_platform != 'darwin' or platform_machine != 'arm64'
13
  markdown2
14
  safetensors
15
+ stable_fast @ https://github.com/chengzeyi/stable-fast/releases/download/v1.0.5/stable_fast-1.0.5+torch222cu121-cp310-cp310-manylinux2014_x86_64.whl ; sys_platform != 'darwin' or platform_machine != 'arm64'
16
+ oneflow @ https://github.com/siliconflow/oneflow_releases/releases/download/community_cu121/oneflow-0.9.1.dev20240515+cu121-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl ; sys_platform != 'darwin' or platform_machine != 'arm64'
17
  onediff @ git+https://github.com/siliconflow/onediff.git@main#egg=onediff ; sys_platform != 'darwin' or platform_machine != 'arm64'
18
  setuptools
19
  mpmath==1.3.0