Bethie commited on
Commit
ffb5779
·
verified ·
1 Parent(s): 2fbc752

Triton upload

Browse files
Triton/bls_sdxl_cnext_ip_anime/1/__pycache__/model.cpython-310.pyc ADDED
Binary file (31.8 kB). View file
 
Triton/bls_sdxl_cnext_ip_anime/1/configs/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "EulerDiscreteScheduler",
3
+ "_diffusers_version": "0.30.3",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "final_sigmas_type": "zero",
9
+ "interpolation_type": "linear",
10
+ "num_train_timesteps": 1000,
11
+ "prediction_type": "epsilon",
12
+ "rescale_betas_zero_snr": false,
13
+ "sample_max_value": 1.0,
14
+ "set_alpha_to_one": false,
15
+ "sigma_max": null,
16
+ "sigma_min": null,
17
+ "skip_prk_steps": true,
18
+ "steps_offset": 1,
19
+ "timestep_spacing": "leading",
20
+ "timestep_type": "discrete",
21
+ "trained_betas": null,
22
+ "use_karras_sigmas": false
23
+ }
Triton/bls_sdxl_cnext_ip_anime/1/configs/text_encoder_2_config/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/root/.cache/huggingface/hub/models--neta-art--neta-xl-2.0/snapshots/3b5b71e53e392a96238e72f95d88ec17990e8ca6/text_encoder_2",
3
+ "architectures": [
4
+ "CLIPTextModelWithProjection"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dropout": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_size": 1280,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 5120,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 20,
19
+ "num_hidden_layers": 32,
20
+ "output_hidden_states": true,
21
+ "pad_token_id": 1,
22
+ "projection_dim": 1280,
23
+ "torch_dtype": "float16",
24
+ "transformers_version": "4.45.2",
25
+ "vocab_size": 49408
26
+ }
Triton/bls_sdxl_cnext_ip_anime/1/configs/unet_config/config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.30.3",
4
+ "_name_or_path": "/root/.cache/huggingface/hub/models--neta-art--neta-xl-2.0/snapshots/3b5b71e53e392a96238e72f95d88ec17990e8ca6/unet",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": "text_time",
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": 256,
9
+ "attention_head_dim": [
10
+ 5,
11
+ 10,
12
+ 20
13
+ ],
14
+ "attention_type": "default",
15
+ "block_out_channels": [
16
+ 320,
17
+ 640,
18
+ 1280
19
+ ],
20
+ "center_input_sample": false,
21
+ "class_embed_type": null,
22
+ "class_embeddings_concat": false,
23
+ "conv_in_kernel": 3,
24
+ "conv_out_kernel": 3,
25
+ "cross_attention_dim": 2048,
26
+ "cross_attention_norm": null,
27
+ "down_block_types": [
28
+ "DownBlock2D",
29
+ "CrossAttnDownBlock2D",
30
+ "CrossAttnDownBlock2D"
31
+ ],
32
+ "downsample_padding": 1,
33
+ "dropout": 0.0,
34
+ "dual_cross_attention": false,
35
+ "encoder_hid_dim": null,
36
+ "encoder_hid_dim_type": null,
37
+ "flip_sin_to_cos": true,
38
+ "freq_shift": 0,
39
+ "in_channels": 4,
40
+ "layers_per_block": 2,
41
+ "mid_block_only_cross_attention": null,
42
+ "mid_block_scale_factor": 1,
43
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
44
+ "norm_eps": 1e-05,
45
+ "norm_num_groups": 32,
46
+ "num_attention_heads": null,
47
+ "num_class_embeds": null,
48
+ "only_cross_attention": false,
49
+ "out_channels": 4,
50
+ "projection_class_embeddings_input_dim": 2816,
51
+ "resnet_out_scale_factor": 1.0,
52
+ "resnet_skip_time_act": false,
53
+ "resnet_time_scale_shift": "default",
54
+ "reverse_transformer_layers_per_block": null,
55
+ "sample_size": 128,
56
+ "time_cond_proj_dim": null,
57
+ "time_embedding_act_fn": null,
58
+ "time_embedding_dim": null,
59
+ "time_embedding_type": "positional",
60
+ "timestep_post_act": null,
61
+ "transformer_layers_per_block": [
62
+ 1,
63
+ 2,
64
+ 10
65
+ ],
66
+ "up_block_types": [
67
+ "CrossAttnUpBlock2D",
68
+ "CrossAttnUpBlock2D",
69
+ "UpBlock2D"
70
+ ],
71
+ "upcast_attention": null,
72
+ "use_linear_projection": true
73
+ }
Triton/bls_sdxl_cnext_ip_anime/1/configs/vae_decode_config/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.30.3",
4
+ "_name_or_path": "/root/.cache/huggingface/hub/models--neta-art--neta-xl-2.0/snapshots/3b5b71e53e392a96238e72f95d88ec17990e8ca6/vae",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "force_upcast": true,
19
+ "in_channels": 3,
20
+ "latent_channels": 4,
21
+ "latents_mean": null,
22
+ "latents_std": null,
23
+ "layers_per_block": 2,
24
+ "mid_block_add_attention": true,
25
+ "norm_num_groups": 32,
26
+ "out_channels": 3,
27
+ "sample_size": 1024,
28
+ "scaling_factor": 0.13025,
29
+ "shift_factor": null,
30
+ "up_block_types": [
31
+ "UpDecoderBlock2D",
32
+ "UpDecoderBlock2D",
33
+ "UpDecoderBlock2D",
34
+ "UpDecoderBlock2D"
35
+ ],
36
+ "use_post_quant_conv": true,
37
+ "use_quant_conv": true
38
+ }
Triton/bls_sdxl_cnext_ip_anime/1/model.py ADDED
@@ -0,0 +1,1410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import inspect
2
+ import logging
3
+ import json
4
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
5
+ from pathlib import Path
6
+ from PIL import Image
7
+
8
+ import numpy as np
9
+ import torch
10
+ from tqdm import tqdm
11
+ from transformers import (
12
+ CLIPImageProcessor,
13
+ CLIPTextModel,
14
+ CLIPTextModelWithProjection,
15
+ CLIPTokenizer,
16
+ CLIPVisionModelWithProjection,
17
+ CLIPFeatureExtractor,
18
+ )
19
+
20
+ from diffusers import DDPMScheduler
21
+ from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback
22
+ from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
23
+ from diffusers.loaders import (
24
+ FromSingleFileMixin,
25
+ IPAdapterMixin,
26
+ StableDiffusionXLLoraLoaderMixin,
27
+ TextualInversionLoaderMixin,
28
+ )
29
+ from diffusers.models import (
30
+ AutoencoderKL,
31
+ ImageProjection,
32
+ UNet2DConditionModel,
33
+ )
34
+ from diffusers.models.attention_processor import (
35
+ AttnProcessor2_0,
36
+ FusedAttnProcessor2_0,
37
+ LoRAAttnProcessor2_0,
38
+ LoRAXFormersAttnProcessor,
39
+ XFormersAttnProcessor,
40
+ )
41
+ from diffusers.models.lora import adjust_lora_scale_text_encoder
42
+ from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
43
+ from diffusers.pipelines.onnx_utils import ORT_TO_NP_TYPE
44
+ from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
45
+ from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
46
+ from diffusers.schedulers import (
47
+ DDIMScheduler,
48
+ DPMSolverMultistepScheduler,
49
+ DEISMultistepScheduler,
50
+ EulerAncestralDiscreteScheduler,
51
+ EulerDiscreteScheduler,
52
+ LMSDiscreteScheduler,
53
+ PNDMScheduler,
54
+ KarrasDiffusionSchedulers,
55
+ )
56
+ from diffusers.utils import (
57
+ USE_PEFT_BACKEND,
58
+ deprecate,
59
+ is_invisible_watermark_available,
60
+ is_torch_xla_available,
61
+ logging as diffusers_logging,
62
+ replace_example_docstring,
63
+ scale_lora_layers,
64
+ unscale_lora_layers,
65
+ PIL_INTERPOLATION,
66
+
67
+ )
68
+ from huggingface_hub.utils import validate_hf_hub_args
69
+
70
+ # Triton Python backend utils
71
+ try:
72
+ import triton_python_backend_utils as pb_utils
73
+ except ImportError:
74
+ pass # triton_python_backend_utils exists only inside Triton Python backend.
75
+
76
+ # Conditionally import watermark if available
77
+ if is_invisible_watermark_available():
78
+ from diffusers.pipelines.stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
79
+
80
+ from configs import *
81
+
82
+
83
+
84
+ class TritonPythonModel:
85
+ tokenizer: CLIPTokenizer
86
+ tokenizer_2: CLIPTokenizer
87
+ image_encoder: CLIPVisionModelWithProjection
88
+ feature_extractor: CLIPImageProcessor
89
+ scheduler: Union[
90
+ DDIMScheduler,
91
+ PNDMScheduler,
92
+ LMSDiscreteScheduler,
93
+ EulerDiscreteScheduler,
94
+ EulerAncestralDiscreteScheduler,
95
+ DPMSolverMultistepScheduler,
96
+ ]
97
+ prompt: Union[str, List[str]]
98
+ prompt_2: Optional[Union[str, List[str]]]
99
+ controlnet_image: Optional[PipelineImageInput]
100
+ cnext_model_name: str
101
+ height: Optional[int]
102
+ width: Optional[int]
103
+ num_inference_steps: int
104
+ timesteps: List[int]
105
+ sigmas: List[float]
106
+ denoising_end: Optional[float]
107
+ guidance_scale: float
108
+ negative_prompt: Optional[Union[str, List[str]]]
109
+ negative_prompt_2: Optional[Union[str, List[str]]]
110
+ num_images_per_prompt: Optional[int]
111
+ eta: float
112
+ generator: Optional[Union[torch.Generator, List[torch.Generator]]]
113
+ latents: Optional[torch.Tensor]
114
+ prompt_embeds: Optional[torch.Tensor]
115
+ negative_prompt_embeds: Optional[torch.Tensor]
116
+ pooled_prompt_embeds: Optional[torch.Tensor]
117
+ negative_pooled_prompt_embeds: Optional[torch.Tensor]
118
+ ip_adapter_image: Optional[PipelineImageInput]
119
+ ip_adapter_image_embeds: Optional[List[torch.Tensor]]
120
+ output_type: Optional[str]
121
+ return_dict: bool
122
+ cross_attention_kwargs: Optional[Dict[str, Any]]
123
+ guidance_rescale: float
124
+ original_size: Optional[Tuple[int, int]]
125
+ crops_coords_top_left: Tuple[int, int]
126
+ target_size: Optional[Tuple[int, int]]
127
+ negative_original_size: Optional[Tuple[int, int]]
128
+ negative_crops_coords_top_left: Tuple[int, int]
129
+ negative_target_size: Optional[Tuple[int, int]]
130
+ clip_skip: Optional[int]
131
+ callback_on_step_end: Optional[Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]]
132
+ control_scale: float
133
+ callback_on_step_end_tensor_inputs: List[str]
134
+ add_watermarker: Optional[bool]
135
+ device: str
136
+
137
+ _callback_tensor_inputs = [
138
+ "latents",
139
+ "prompt_embeds",
140
+ "negative_prompt_embeds",
141
+ "add_text_embeds",
142
+ "add_time_ids",
143
+ "negative_pooled_prompt_embeds",
144
+ "negative_add_time_ids",
145
+ ]
146
+
147
+ @property
148
+ def guidance_scale(self) -> Optional[float]:
149
+ return self._guidance_scale
150
+
151
+ @guidance_scale.setter
152
+ def guidance_scale(self, value: Optional[float]) -> None:
153
+ self._guidance_scale = value
154
+
155
+ # Getter and setter for guidance_rescale
156
+ @property
157
+ def guidance_rescale(self) -> Optional[float]:
158
+ return self._guidance_rescale
159
+
160
+ @guidance_rescale.setter
161
+ def guidance_rescale(self, value: Optional[float]) -> None:
162
+ self._guidance_rescale = value
163
+
164
+ # Getter and setter for clip_skip
165
+ @property
166
+ def clip_skip(self) -> Optional[int]:
167
+ return self._clip_skip
168
+
169
+ @clip_skip.setter
170
+ def clip_skip(self, value: Optional[int]) -> None:
171
+ self._clip_skip = value
172
+
173
+ # Getter for do_classifier_free_guidance (no setter since it's computed)
174
+ @property
175
+ def do_classifier_free_guidance(self) -> bool:
176
+ return self._guidance_scale > 1 and self.unet_configs['time_cond_proj_dim'] is None
177
+
178
+ # Getter and setter for cross_attention_kwargs
179
+ @property
180
+ def cross_attention_kwargs(self) -> Optional[dict]:
181
+ return self._cross_attention_kwargs
182
+
183
+ @cross_attention_kwargs.setter
184
+ def cross_attention_kwargs(self, value: Optional[dict]) -> None:
185
+ self._cross_attention_kwargs = value
186
+
187
+ # Getter and setter for denoising_end
188
+ @property
189
+ def denoising_end(self) -> Optional[float]:
190
+ return self._denoising_end
191
+
192
+ @denoising_end.setter
193
+ def denoising_end(self, value: Optional[float]) -> None:
194
+ self._denoising_end = value
195
+
196
+ # Getter and setter for num_timesteps
197
+ @property
198
+ def num_timesteps(self) -> Optional[int]:
199
+ return self._num_timesteps
200
+
201
+ @num_timesteps.setter
202
+ def num_timesteps(self, value: Optional[int]) -> None:
203
+ self._num_timesteps = value
204
+
205
+ # Getter and setter for interrupt
206
+ @property
207
+ def interrupt(self) -> Optional[bool]:
208
+ return self._interrupt
209
+
210
+ @interrupt.setter
211
+ def interrupt(self, value: Optional[bool]) -> None:
212
+ self._interrupt = value
213
+
214
+
215
+ def initialize(self, args: Dict[str, str]) -> None:
216
+
217
+ # current_name: str = str(Path(args["model_repository"]).parent.absolute())
218
+ # self.scheduler_config_path = current_name + "/bls_sdxl_cnext_ip/1/scheduler/"
219
+ # self.scheduler = DEISMultistepScheduler.from_config(self.scheduler_config_path)
220
+
221
+ self.logger = pb_utils.Logger
222
+
223
+ self.prompt = None
224
+ self.prompt_2 = None
225
+ self.negative_prompt = None
226
+ self.negative_prompt_2 = None
227
+
228
+ self.controlnet_image = None
229
+ self.cnext_model_name = None
230
+
231
+ self.height = None
232
+ self.width = None
233
+ self.num_inference_steps = 50
234
+ self.timesteps = None
235
+ self.sigmas = None
236
+ self.denoising_end = None
237
+ self.guidance_scale = 5.0
238
+
239
+ self.num_images_per_prompt = 1
240
+ self.eta = 0.0
241
+ self.generator = None
242
+ self.latents = None
243
+ self.prompt_embeds = None
244
+ self.negative_prompt_embeds = None
245
+ self.pooled_prompt_embeds = None
246
+ self.negative_pooled_prompt_embeds = None
247
+ self.ip_adapter_image = None
248
+ self.ip_adapter_image_embeds = None
249
+ self.output_type = "pil"
250
+ self.return_dict = True
251
+ self.cross_attention_kwargs = None
252
+ self.guidance_rescale = 0.0
253
+ self.original_size = None
254
+ self.crops_coords_top_left = (0, 0)
255
+ self.target_size = None
256
+ self.negative_original_size = None
257
+ self.negative_crops_coords_top_left = (0, 0)
258
+ self.negative_target_size = None
259
+ self.clip_skip = None
260
+ self.callback_on_step_end = None
261
+ self.control_scale = 1.5
262
+ self.dtype = torch.float32
263
+ self.callback_on_step_end_tensor_inputs = ["latents"]
264
+ self.add_watermarker = None
265
+
266
+ current_name: str = str(Path(args["model_repository"]).parent.absolute())
267
+
268
+ self.vae_configs = self.read_json(current_name + "/bls_sdxl_cnext_ip_anime/1/configs/vae_decode_config/config.json")
269
+ self.unet_configs = self.read_json(current_name + "/bls_sdxl_cnext_ip_anime/1/configs/unet_config/config.json")
270
+ self.text_encoder_2_configs = self.read_json(current_name + "/bls_sdxl_cnext_ip_anime/1/configs/text_encoder_2_config/config.json")
271
+ # self.scheduler = DDPMScheduler
272
+ self.scheduler_config_path = current_name + "/bls_sdxl_cnext_ip_anime/1/configs/scheduler/"
273
+ self.scheduler = DDPMScheduler.from_config(self.scheduler_config_path)
274
+
275
+
276
+ self.vae_scale_factor = 2 ** (len(self.vae_configs['block_out_channels']) - 1)
277
+ self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
278
+ self.control_image_processor = VaeImageProcessor(
279
+ vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
280
+ )
281
+
282
+ self.default_sample_size = self.unet_configs["sample_size"]
283
+
284
+ add_watermarker = self.add_watermarker if self.add_watermarker is not None else is_invisible_watermark_available()
285
+
286
+ if add_watermarker:
287
+ self.watermark = StableDiffusionXLWatermarker()
288
+ else:
289
+ self.watermark = None
290
+
291
+ if args.get("model_instance_kind") == "GPU":
292
+ self.device = "cuda"
293
+ else:
294
+ self.device = "cpu"
295
+
296
+ self.image_dtype = torch.float32
297
+
298
+ def prepare_image(
299
+ self,
300
+ image,
301
+ width,
302
+ height,
303
+ batch_size,
304
+ num_images_per_prompt,
305
+ device,
306
+ dtype,
307
+ do_classifier_free_guidance=False,
308
+ guess_mode=False,
309
+ ):
310
+ image = self.control_image_processor.preprocess(image, height=height, width=width).to(dtype=torch.float32)
311
+ image_batch_size = image.shape[0]
312
+
313
+ if image_batch_size == 1:
314
+ repeat_by = batch_size
315
+ else:
316
+ repeat_by = num_images_per_prompt
317
+
318
+ image = image.repeat_interleave(repeat_by, dim=0)
319
+
320
+ image = image.to(device=device, dtype=dtype)
321
+
322
+ if do_classifier_free_guidance and not guess_mode:
323
+ image = torch.cat([image] * 2)
324
+
325
+ return image
326
+
327
+
328
+ def check_inputs(
329
+ self,
330
+ prompt,
331
+ prompt_2,
332
+ height,
333
+ width,
334
+ callback_steps,
335
+ negative_prompt=None,
336
+ negative_prompt_2=None,
337
+ prompt_embeds=None,
338
+ negative_prompt_embeds=None,
339
+ pooled_prompt_embeds=None,
340
+ negative_pooled_prompt_embeds=None,
341
+ ip_adapter_image=None,
342
+ ip_adapter_image_embeds=None,
343
+ ):
344
+ if height % 8 != 0 or width % 8 != 0:
345
+ raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
346
+
347
+ if callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0):
348
+ raise ValueError(
349
+ f"`callback_steps` has to be a positive integer but is {callback_steps} of type"
350
+ f" {type(callback_steps)}."
351
+ )
352
+
353
+ if prompt is not None and prompt_embeds is not None:
354
+ raise ValueError(
355
+ f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
356
+ " only forward one of the two."
357
+ )
358
+ elif prompt_2 is not None and prompt_embeds is not None:
359
+ raise ValueError(
360
+ f"Cannot forward both `prompt_2`: {prompt_2} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
361
+ " only forward one of the two."
362
+ )
363
+ elif prompt is None and prompt_embeds is None:
364
+ raise ValueError(
365
+ "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined."
366
+ )
367
+ elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
368
+ raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
369
+ elif prompt_2 is not None and (not isinstance(prompt_2, str) and not isinstance(prompt_2, list)):
370
+ raise ValueError(f"`prompt_2` has to be of type `str` or `list` but is {type(prompt_2)}")
371
+
372
+ if negative_prompt is not None and negative_prompt_embeds is not None:
373
+ raise ValueError(
374
+ f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:"
375
+ f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
376
+ )
377
+ elif negative_prompt_2 is not None and negative_prompt_embeds is not None:
378
+ raise ValueError(
379
+ f"Cannot forward both `negative_prompt_2`: {negative_prompt_2} and `negative_prompt_embeds`:"
380
+ f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
381
+ )
382
+
383
+ if prompt_embeds is not None and negative_prompt_embeds is not None:
384
+ if prompt_embeds.shape != negative_prompt_embeds.shape:
385
+ raise ValueError(
386
+ "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but"
387
+ f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`"
388
+ f" {negative_prompt_embeds.shape}."
389
+ )
390
+
391
+ if prompt_embeds is not None and pooled_prompt_embeds is None:
392
+ raise ValueError(
393
+ "If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
394
+ )
395
+
396
+ if negative_prompt_embeds is not None and negative_pooled_prompt_embeds is None:
397
+ raise ValueError(
398
+ "If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
399
+ )
400
+
401
+ if ip_adapter_image is not None and ip_adapter_image_embeds is not None:
402
+ raise ValueError(
403
+ "Provide either `ip_adapter_image` or `ip_adapter_image_embeds`. Cannot leave both `ip_adapter_image` and `ip_adapter_image_embeds` defined."
404
+ )
405
+
406
+ if ip_adapter_image_embeds is not None:
407
+ if not isinstance(ip_adapter_image_embeds, list):
408
+ raise ValueError(
409
+ f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
410
+ )
411
+ elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
412
+ raise ValueError(
413
+ f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
414
+ )
415
+
416
+
417
+ #Call CLIPTokenizer model.py
418
+ def tokenizer_infer(self, prompt):
419
+ prompt = np.array(prompt, dtype = np.object_)
420
+ text_input = pb_utils.Tensor("TEXT", prompt)
421
+ inference_request = pb_utils.InferenceRequest(
422
+ model_name="tokenizer",
423
+ requested_output_names=["input_ids"],
424
+ inputs=[text_input],
425
+ )
426
+ inference_response = inference_request.exec()
427
+ if inference_response.has_error():
428
+ raise pb_utils.TritonModelException(
429
+ inference_response.error().message()
430
+ )
431
+ else:
432
+ text_input_ids = pb_utils.get_output_tensor_by_name(
433
+ inference_response, "input_ids"
434
+ )
435
+ text_input_ids: torch.Tensor = torch.from_dlpack(text_input_ids.to_dlpack())
436
+ return text_input_ids
437
+
438
+ #Call CLIPTokenizer 2 model.py
439
+ def tokenizer_2_infer(self, prompt):
440
+ prompt = np.array(prompt, dtype = np.object_)
441
+ text_input = pb_utils.Tensor("TEXT", prompt)
442
+ inference_request = pb_utils.InferenceRequest(
443
+ model_name="tokenizer_2",
444
+ requested_output_names=["input_ids"],
445
+ inputs=[text_input],
446
+ )
447
+ inference_response = inference_request.exec()
448
+ if inference_response.has_error():
449
+ raise pb_utils.TritonModelException(
450
+ inference_response.error().message()
451
+ )
452
+ else:
453
+ text_input_ids = pb_utils.get_output_tensor_by_name(
454
+ inference_response, "input_ids"
455
+ )
456
+ text_input_ids: torch.Tensor = torch.from_dlpack(text_input_ids.to_dlpack())
457
+ return text_input_ids
458
+
459
+ #Call text encode model
460
+ def text_encoder_infer(self, text_input_ids):
461
+ text_input_encoder = pb_utils.Tensor("input_ids", np.array(text_input_ids))
462
+
463
+ inference_request = pb_utils.InferenceRequest(
464
+ model_name="text_encoder",
465
+ requested_output_names=["last_hidden_state"],
466
+ inputs=[text_input_encoder],
467
+ )
468
+ inference_response = inference_request.exec()
469
+ if inference_response.has_error():
470
+ raise pb_utils.TritonModelException(
471
+ inference_response.error().message()
472
+ )
473
+
474
+ output = pb_utils.get_output_tensor_by_name(
475
+ inference_response, "last_hidden_state"
476
+ )
477
+ return output
478
+
479
+
480
+ #Call text encode 2 model
481
+ def text_encoder_2_infer(self, text_input_ids):
482
+ text_input_encoder = pb_utils.Tensor("input_ids", np.array(text_input_ids))
483
+
484
+ inference_request = pb_utils.InferenceRequest(
485
+ model_name="text_encoder_2",
486
+ requested_output_names=["last_hidden_state"],
487
+ inputs=[text_input_encoder],
488
+ )
489
+ inference_response = inference_request.exec()
490
+ if inference_response.has_error():
491
+ raise pb_utils.TritonModelException(
492
+ inference_response.error().message()
493
+ )
494
+
495
+ output = pb_utils.get_output_tensor_by_name(
496
+ inference_response, "last_hidden_state"
497
+ )
498
+ return output
499
+
500
+ def encode_prompt(
501
+ self,
502
+ prompt: str,
503
+ prompt_2: Optional[str] = None,
504
+ num_images_per_prompt: int = 1,
505
+ do_classifier_free_guidance: bool = True,
506
+ negative_prompt: Optional[str] = None,
507
+ negative_prompt_2: Optional[str] = None,
508
+ prompt_embeds: Optional[torch.Tensor] = None,
509
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
510
+ pooled_prompt_embeds: Optional[torch.Tensor] = None,
511
+ negative_pooled_prompt_embeds: Optional[torch.Tensor] = None,
512
+ clip_skip: Optional[int] = None,
513
+ ):
514
+ prompt = [prompt] if isinstance(prompt, str) else prompt
515
+
516
+ batch_size = len(prompt) if isinstance(prompt, list) else 1
517
+
518
+ if prompt_embeds is None:
519
+ # Prepare prompt_2
520
+ prompt_2 = prompt_2 or prompt
521
+ prompt_2 = [prompt_2] if isinstance(prompt_2, str) else prompt_2
522
+ prompt_embeds_list = []
523
+
524
+
525
+ #Phrase 1 - prompt
526
+ text_input_ids = self.tokenizer_infer(prompt)
527
+ # untruncated_ids = self.tokenizer_infer(prompt)
528
+ prompt_embeds = self.text_encoder_infer(text_input_ids.cpu().numpy().astype(np.int32))
529
+ pooled_prompt_embeds = torch.from_numpy(prompt_embeds[0])
530
+ if clip_skip is None:
531
+ prompt_embeds = torch.from_numpy(prompt_embeds[-2])
532
+ else:
533
+ prompt_embeds = torch.from_numpy(prompt_embeds[-(clip_skip + 2)])
534
+ prompt_embeds_list.append(prompt_embeds)
535
+
536
+ #Phrase 2 - prompt
537
+ text_input_ids = self.tokenizer_2_infer(prompt_2)
538
+ # untruncated_ids = self.tokenizer_2_infer(prompt_2)
539
+ prompt_embeds = self.text_encoder_2_infer(text_input_ids.cpu().numpy().astype(np.int64))
540
+ pooled_prompt_embeds = torch.from_numpy(prompt_embeds[0])
541
+ if clip_skip is None:
542
+ prompt_embeds = torch.from_numpy(prompt_embeds[-2])
543
+ else:
544
+ prompt_embeds = torch.from_numpy(prompt_embeds[-(clip_skip + 2)])
545
+ prompt_embeds_list.append(prompt_embeds)
546
+ #End prompt
547
+
548
+ prompt_embeds = torch.concat(prompt_embeds_list, dim=-1)
549
+ zero_out_negative_prompt = negative_prompt is None and self.config.force_zeros_for_empty_prompt
550
+ if do_classifier_free_guidance and negative_prompt_embeds is None and zero_out_negative_prompt:
551
+ negative_prompt_embeds = torch.zeros_like(prompt_embeds)
552
+ negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
553
+ elif do_classifier_free_guidance and negative_prompt_embeds is None:
554
+ negative_prompt = negative_prompt or ""
555
+ negative_prompt_2 = negative_prompt_2 or negative_prompt
556
+
557
+ negative_prompt = batch_size * [negative_prompt] if isinstance(negative_prompt, str) else negative_prompt
558
+ negative_prompt_2 = (
559
+ batch_size * [negative_prompt_2] if isinstance(negative_prompt_2, str) else negative_prompt_2
560
+ )
561
+
562
+ uncond_tokens: List[str]
563
+ if prompt is not None and type(prompt) is not type(negative_prompt):
564
+ raise TypeError(
565
+ f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
566
+ f" {type(prompt)}."
567
+ )
568
+ elif batch_size != len(negative_prompt):
569
+ raise ValueError(
570
+ f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
571
+ f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
572
+ " the batch size of `prompt`."
573
+ )
574
+ else:
575
+ uncond_tokens = [negative_prompt, negative_prompt_2]
576
+
577
+ #Phrase 1 - neg prompt
578
+ negative_prompt_embeds_list = []
579
+ max_length = prompt_embeds.shape[1]
580
+ uncond_input = self.tokenizer_infer(negative_prompt)
581
+ negative_prompt_embeds = self.text_encoder_2_infer(uncond_input.input_ids.cpu().numpy().astype(np.int32))
582
+ negative_pooled_prompt_embeds = torch.from_numpy(negative_prompt_embeds[0])
583
+ negative_prompt_embeds = torch.from_numpy(negative_prompt_embeds[-2])
584
+ negative_prompt_embeds_list.append(negative_prompt_embeds)
585
+
586
+ #Phrase 2 - neg prompt
587
+ max_length = prompt_embeds.shape[1]
588
+ uncond_input = self.tokenizer_infer(negative_prompt)
589
+ negative_prompt_embeds = self.text_encoder_2_infer(uncond_input.input_ids.cpu().numpy().astype(np.int64))
590
+ negative_pooled_prompt_embeds = torch.from_numpy(negative_prompt_embeds[0])
591
+ negative_prompt_embeds = torch.from_numpy(negative_prompt_embeds[-2])
592
+ negative_prompt_embeds_list.append(negative_prompt_embeds)
593
+ #End phrase
594
+
595
+ negative_prompt_embeds = torch.concat(negative_prompt_embeds_list, dim=-1)
596
+
597
+ prompt_embeds = prompt_embeds.to(dtype=self.dtype, device=self.device)
598
+
599
+ bs_embed, seq_len, _ = prompt_embeds.shape
600
+ prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
601
+ prompt_embeds = prompt_embeds.view(bs_embed * num_images_per_prompt, seq_len, -1)
602
+
603
+ if do_classifier_free_guidance:
604
+ seq_len = negative_prompt_embeds.shape[1]
605
+
606
+ negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.dtype, device=self.device)
607
+
608
+ negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
609
+ negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
610
+
611
+ pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
612
+ bs_embed * num_images_per_prompt, -1
613
+ )
614
+ if do_classifier_free_guidance:
615
+ negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
616
+ bs_embed * num_images_per_prompt, -1
617
+ )
618
+
619
+ return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
620
+
621
+
622
+
623
+ def prepare_extra_step_kwargs(self, generator, eta):
624
+ # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
625
+ # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
626
+ # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
627
+ # and should be between [0, 1]
628
+
629
+ accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
630
+ extra_step_kwargs = {}
631
+ if accepts_eta:
632
+ extra_step_kwargs["eta"] = eta
633
+
634
+ # check if the scheduler accepts generator
635
+ accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys())
636
+ if accepts_generator:
637
+ extra_step_kwargs["generator"] = generator
638
+ return extra_step_kwargs
639
+
640
+ def randn_tensor(
641
+ self,
642
+ shape: Union[Tuple, List],
643
+ generator: Optional[Union[List["torch.Generator"], "torch.Generator"]] = None,
644
+ device: Optional["torch.device"] = None,
645
+ dtype: Optional["torch.dtype"] = None,
646
+ layout: Optional["torch.layout"] = None,
647
+ ):
648
+ """A helper function to create random tensors on the desired `device` with the desired `dtype`. When
649
+ passing a list of generators, you can seed each batch size individually. If CPU generators are passed, the tensor
650
+ is always created on the CPU.
651
+ """
652
+ # device on which tensor is created defaults to device
653
+ rand_device = device
654
+ batch_size = shape[0]
655
+
656
+ layout = layout or torch.strided
657
+ device = device or torch.device("cpu")
658
+
659
+ if generator is not None:
660
+ gen_device_type = generator.device.type if not isinstance(generator, list) else generator[0].device.type
661
+ if gen_device_type != device.type and gen_device_type == "cpu":
662
+ rand_device = "cpu"
663
+ if device != "mps":
664
+ self.logger.info(
665
+ f"The passed generator was created on 'cpu' even though a tensor on {device} was expected."
666
+ f" Tensors will be created on 'cpu' and then moved to {device}. Note that one can probably"
667
+ f" slighly speed up this function by passing a generator that was created on the {device} device."
668
+ )
669
+ elif gen_device_type != device.type and gen_device_type == "cuda":
670
+ raise ValueError(f"Cannot generate a {device} tensor from a generator of type {gen_device_type}.")
671
+
672
+ if isinstance(generator, list):
673
+ shape = (1,) + shape[1:]
674
+ latents = [
675
+ torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype, layout=layout)
676
+ for i in range(batch_size)
677
+ ]
678
+ latents = torch.cat(latents, dim=0).to(device)
679
+ else:
680
+ latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype, layout=layout).to(device)
681
+
682
+ return latents
683
+
684
+ def check_inputs(
685
+ self,
686
+ prompt,
687
+ prompt_2,
688
+ height,
689
+ width,
690
+ callback_steps,
691
+ negative_prompt=None,
692
+ negative_prompt_2=None,
693
+ prompt_embeds=None,
694
+ negative_prompt_embeds=None,
695
+ pooled_prompt_embeds=None,
696
+ negative_pooled_prompt_embeds=None,
697
+ ip_adapter_image=None,
698
+ ip_adapter_image_embeds=None,
699
+ callback_on_step_end_tensor_inputs=None,
700
+ ):
701
+ if height % 8 != 0 or width % 8 != 0:
702
+ raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
703
+
704
+ if callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0):
705
+ raise ValueError(
706
+ f"`callback_steps` has to be a positive integer but is {callback_steps} of type"
707
+ f" {type(callback_steps)}."
708
+ )
709
+
710
+ if callback_on_step_end_tensor_inputs is not None and not all(
711
+ k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs
712
+ ):
713
+ raise ValueError(
714
+ f"`callback_on_step_end_tensor_inputs` has to be in {self._callback_tensor_inputs}, but found {[k for k in callback_on_step_end_tensor_inputs if k not in self._callback_tensor_inputs]}"
715
+ )
716
+
717
+ if prompt is not None and prompt_embeds is not None:
718
+ raise ValueError(
719
+ f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
720
+ " only forward one of the two."
721
+ )
722
+ elif prompt_2 is not None and prompt_embeds is not None:
723
+ raise ValueError(
724
+ f"Cannot forward both `prompt_2`: {prompt_2} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
725
+ " only forward one of the two."
726
+ )
727
+ elif prompt is None and prompt_embeds is None:
728
+ raise ValueError(
729
+ "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined."
730
+ )
731
+ elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
732
+ raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
733
+ elif prompt_2 is not None and (not isinstance(prompt_2, str) and not isinstance(prompt_2, list)):
734
+ raise ValueError(f"`prompt_2` has to be of type `str` or `list` but is {type(prompt_2)}")
735
+
736
+ if negative_prompt is not None and negative_prompt_embeds is not None:
737
+ raise ValueError(
738
+ f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:"
739
+ f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
740
+ )
741
+ elif negative_prompt_2 is not None and negative_prompt_embeds is not None:
742
+ raise ValueError(
743
+ f"Cannot forward both `negative_prompt_2`: {negative_prompt_2} and `negative_prompt_embeds`:"
744
+ f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
745
+ )
746
+
747
+ if prompt_embeds is not None and negative_prompt_embeds is not None:
748
+ if prompt_embeds.shape != negative_prompt_embeds.shape:
749
+ raise ValueError(
750
+ "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but"
751
+ f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`"
752
+ f" {negative_prompt_embeds.shape}."
753
+ )
754
+
755
+ if prompt_embeds is not None and pooled_prompt_embeds is None:
756
+ raise ValueError(
757
+ "If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
758
+ )
759
+
760
+ if negative_prompt_embeds is not None and negative_pooled_prompt_embeds is None:
761
+ raise ValueError(
762
+ "If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
763
+ )
764
+
765
+ if ip_adapter_image is not None and ip_adapter_image_embeds is not None:
766
+ raise ValueError(
767
+ "Provide either `ip_adapter_image` or `ip_adapter_image_embeds`. Cannot leave both `ip_adapter_image` and `ip_adapter_image_embeds` defined."
768
+ )
769
+
770
+ if ip_adapter_image_embeds is not None:
771
+ if not isinstance(ip_adapter_image_embeds, list):
772
+ raise ValueError(
773
+ f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}"
774
+ )
775
+ elif ip_adapter_image_embeds[0].ndim not in [3, 4]:
776
+ raise ValueError(
777
+ f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
778
+ )
779
+
780
+
781
+ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
782
+ shape = (
783
+ batch_size,
784
+ num_channels_latents,
785
+ int(height) // self.vae_scale_factor,
786
+ int(width) // self.vae_scale_factor,
787
+ )
788
+ if isinstance(generator, list) and len(generator) != batch_size:
789
+ raise ValueError(
790
+ f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
791
+ f" size of {batch_size}. Make sure the batch size matches the length of the generators."
792
+ )
793
+
794
+ if latents is None:
795
+ latents = self.randn_tensor(shape, generator=generator, device=device, dtype=dtype)
796
+ else:
797
+ latents = latents.to(device)
798
+
799
+ latents = latents * self.scheduler.init_noise_sigma
800
+ return latents
801
+
802
+
803
+ def _get_add_time_ids(
804
+ self, original_size, crops_coords_top_left, target_size, dtype, text_encoder_projection_dim=None
805
+ ):
806
+ add_time_ids = list(original_size + crops_coords_top_left + target_size)
807
+ passed_add_embed_dim = (
808
+ self.unet_configs['addition_time_embed_dim'] * len(add_time_ids) + text_encoder_projection_dim
809
+ )
810
+ expected_add_embed_dim = self.unet_configs['projection_class_embeddings_input_dim']
811
+
812
+ if expected_add_embed_dim != passed_add_embed_dim:
813
+ raise ValueError(
814
+ f"Model expects an added time embedding vector of length {expected_add_embed_dim}, but a vector of {passed_add_embed_dim} was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`."
815
+ )
816
+
817
+ add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
818
+ return add_time_ids
819
+
820
+
821
+ def get_guidance_scale_embedding(
822
+ self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
823
+ ) -> torch.Tensor:
824
+ assert len(w.shape) == 1
825
+ w = w * 1000.0
826
+
827
+ half_dim = embedding_dim // 2
828
+ emb = torch.log(torch.tensor(10000.0)) / (half_dim - 1)
829
+ emb = torch.exp(torch.arange(half_dim, dtype=dtype) * -emb)
830
+ emb = w.to(dtype)[:, None] * emb[None, :]
831
+ emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
832
+ if embedding_dim % 2 == 1: # zero pad
833
+ emb = torch.nn.functional.pad(emb, (0, 1))
834
+ assert emb.shape == (w.shape[0], embedding_dim)
835
+ return emb
836
+
837
+
838
+ def read_json(self, json_path):
839
+ with open(json_path, 'r') as f:
840
+ data = json.load(f)
841
+ return data
842
+
843
+ def rescale_noise_cfg(self, noise_cfg, noise_pred_text, guidance_rescale=0.0):
844
+ """
845
+ Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
846
+ Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
847
+ """
848
+ std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
849
+ std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
850
+ # rescale the results from guidance (fixes overexposure)
851
+ noise_pred_rescaled = noise_cfg * (std_text / std_cfg)
852
+ # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images
853
+ noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg
854
+ return noise_cfg
855
+
856
+ def retrieve_timesteps(
857
+ self,
858
+ scheduler,
859
+ num_inference_steps: Optional[int] = None,
860
+ device: Optional[Union[str, torch.device]] = None,
861
+ timesteps: Optional[List[int]] = None,
862
+ sigmas: Optional[List[float]] = None,
863
+ **kwargs,
864
+ ):
865
+ """
866
+ Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
867
+ custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
868
+
869
+ Args:
870
+ scheduler (`SchedulerMixin`):
871
+ The scheduler to get timesteps from.
872
+ num_inference_steps (`int`):
873
+ The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
874
+ must be `None`.
875
+ device (`str` or `torch.device`, *optional*):
876
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
877
+ timesteps (`List[int]`, *optional*):
878
+ Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
879
+ `num_inference_steps` and `sigmas` must be `None`.
880
+ sigmas (`List[float]`, *optional*):
881
+ Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
882
+ `num_inference_steps` and `timesteps` must be `None`.
883
+
884
+ Returns:
885
+ `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
886
+ second element is the number of inference steps.
887
+ """
888
+ if timesteps is not None and sigmas is not None:
889
+ raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
890
+ if timesteps is not None:
891
+ accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
892
+ if not accepts_timesteps:
893
+ raise ValueError(
894
+ f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
895
+ f" timestep schedules. Please check whether you are using the correct scheduler."
896
+ )
897
+ scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
898
+ timesteps = scheduler.timesteps
899
+ num_inference_steps = len(timesteps)
900
+ elif sigmas is not None:
901
+ accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
902
+ if not accept_sigmas:
903
+ raise ValueError(
904
+ f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
905
+ f" sigmas schedules. Please check whether you are using the correct scheduler."
906
+ )
907
+ scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
908
+ timesteps = scheduler.timesteps
909
+ num_inference_steps = len(timesteps)
910
+ else:
911
+ scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
912
+ timesteps = scheduler.timesteps
913
+ return timesteps, num_inference_steps
914
+
915
+
916
+ def execute(self, requests) -> "List[List[pb_utils.Tensor]]":
917
+
918
+ responses = []
919
+
920
+ for request in requests:
921
+ # client send binary data typed - convert back to string
922
+ prompt = [
923
+ t.decode("UTF-8")
924
+ for t in pb_utils.get_input_tensor_by_name(request, "PROMPT")
925
+ .as_numpy()
926
+ .tolist()
927
+ ]
928
+ prompt_2 = [
929
+ t.decode("UTF-8")
930
+ for t in pb_utils.get_input_tensor_by_name(request, "PROMPT_2")
931
+ .as_numpy()
932
+ .tolist()
933
+ ]
934
+ negative_prompt = [
935
+ t.decode("UTF-8")
936
+ for t in pb_utils.get_input_tensor_by_name(request, "NEGATIVE_PROMPT")
937
+ .as_numpy()
938
+ .tolist()
939
+ ]
940
+ negative_prompt_2 = [
941
+ t.decode("UTF-8")
942
+ for t in pb_utils.get_input_tensor_by_name(request, "NEGATIVE_PROMPT_2")
943
+ .as_numpy()
944
+ .tolist()
945
+ ]
946
+ cnext_model_name = [
947
+ t.decode("UTF-8")
948
+ for t in pb_utils.get_input_tensor_by_name(request, "CNEXT_MODEL_NAME")
949
+ .as_numpy()
950
+ .tolist()
951
+ ][0]
952
+ controlnet_image = [
953
+ t
954
+ for t in pb_utils.get_input_tensor_by_name(request, "CNEXT_IMAGE")
955
+ .as_numpy()
956
+ .tolist()
957
+ ]
958
+ ip_adapter_image = [
959
+ t
960
+ for t in pb_utils.get_input_tensor_by_name(request, "IP_ADAPTER_IMAGE")
961
+ .as_numpy()
962
+ .tolist()
963
+ ]
964
+ self.guidance_scale = [
965
+ t
966
+ for t in pb_utils.get_input_tensor_by_name(request, "GUIDANCE_SCALE")
967
+ .as_numpy()
968
+ .tolist()
969
+ ][0]
970
+ self.num_inference_steps = [
971
+ t
972
+ for t in pb_utils.get_input_tensor_by_name(request, "STEPS")
973
+ .as_numpy()
974
+ .tolist()
975
+ ][0]
976
+ self.control_scale = [
977
+ t
978
+ for t in pb_utils.get_input_tensor_by_name(request, "CNEXT_CONDITIONAL_SCALE")
979
+ .as_numpy()
980
+ .tolist()
981
+ ][0]
982
+ width = [
983
+ t
984
+ for t in pb_utils.get_input_tensor_by_name(request, "WIDTH")
985
+ .as_numpy()
986
+ .tolist()
987
+ ]
988
+ height = [
989
+ t
990
+ for t in pb_utils.get_input_tensor_by_name(request, "HEIGHT")
991
+ .as_numpy()
992
+ .tolist()
993
+ ]
994
+ seed = [
995
+ t
996
+ for t in pb_utils.get_input_tensor_by_name(request, "SEED")
997
+ .as_numpy()
998
+ .tolist()
999
+ ][0]
1000
+
1001
+ #Check data input from user
1002
+ self.logger.log_info(f"############ GET REQUEST ############")
1003
+ self.logger.log_info(f"Prompt: {prompt}")
1004
+ self.logger.log_info(f"Prompt_2: {prompt_2}")
1005
+ self.logger.log_info(f"Neg-Prompt: {negative_prompt}")
1006
+ self.logger.log_info(f"Neg-Prompt_2: {negative_prompt_2}")
1007
+ self.logger.log_info(f"Cnext model name: {cnext_model_name}")
1008
+ self.logger.log_info(f"Cnext image shape: {controlnet_image.shape}")
1009
+ self.logger.log_info(f"IP image shape: {ip_adapter_image.shape}")
1010
+
1011
+ self.logger.log_info(f"guidance_scale: {self.guidance_scale.shape}")
1012
+ self.logger.log_info(f"num_inference_steps: {self.num_inference_steps.shape}")
1013
+ self.logger.log_info(f"control_scale: {self.control_scale.shape}")
1014
+ self.logger.log_info(f"width: {width.shape}")
1015
+ self.logger.log_info(f"height: {height.shape}")
1016
+ self.logger.log_info(f"seed: {seed.shape}")
1017
+
1018
+
1019
+ height = height or self.default_sample_size * self.vae_scale_factor
1020
+ width = width or self.default_sample_size * self.vae_scale_factor
1021
+
1022
+ original_size = original_size or (height, width)
1023
+ target_size = target_size or (height, width)
1024
+
1025
+ self.logger.log_info(f"############ Process ############")
1026
+ self.logger.log_info(f"width: {width.shape}")
1027
+ self.logger.log_info(f"height: {height.shape}")
1028
+
1029
+ # self._guidance_scale = self.guidance_scale
1030
+ # self._guidance_rescale = self.guidance_rescale
1031
+ # self._clip_skip = self.clip_skip
1032
+ # self._denoising_end = self.denoising_end
1033
+ # self._interrupt = False
1034
+
1035
+ if isinstance(prompt, str):
1036
+ batch_size = 1
1037
+ elif isinstance(prompt, list):
1038
+ batch_size = len(prompt)
1039
+ else:
1040
+ raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
1041
+
1042
+ (
1043
+ prompt_embeds,
1044
+ negative_prompt_embeds,
1045
+ pooled_prompt_embeds,
1046
+ negative_pooled_prompt_embeds,
1047
+ ) = self.encode_prompt(
1048
+ prompt=prompt,
1049
+ prompt_2=prompt_2,
1050
+ num_images_per_prompt=self.num_images_per_prompt,
1051
+ do_classifier_free_guidance=self.do_classifier_free_guidance,
1052
+ negative_prompt=negative_prompt,
1053
+ negative_prompt_2=negative_prompt_2,
1054
+ prompt_embeds=prompt_embeds,
1055
+ negative_prompt_embeds=negative_prompt_embeds,
1056
+ pooled_prompt_embeds=pooled_prompt_embeds,
1057
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
1058
+ clip_skip=self.clip_skip,
1059
+ )
1060
+
1061
+ timesteps, num_inference_steps = self.retrieve_timesteps(
1062
+ self.scheduler, num_inference_steps, self.device, self.timesteps, self.sigmas
1063
+ )
1064
+ num_channels_latents = self.unet_configs['in_channels']
1065
+
1066
+ randome_seed = np.random.RandomState(seed) if seed > 0 else np.random
1067
+ torch_seed = randome_seed.randint(2147483647)
1068
+ torch_gen = torch.Generator().manual_seed(torch_seed)
1069
+ generator=torch_gen
1070
+
1071
+ latents = self.prepare_latents(
1072
+ batch_size * self.num_images_per_prompt,
1073
+ num_channels_latents,
1074
+ height,
1075
+ width,
1076
+ prompt_embeds.dtype,
1077
+ self.device,
1078
+ generator,
1079
+ latents,
1080
+ )
1081
+
1082
+ extra_step_kwargs = self.prepare_extra_step_kwargs(generator, self.eta)
1083
+
1084
+ add_text_embeds = pooled_prompt_embeds
1085
+ if self.text_encoder_2 is None:
1086
+ text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
1087
+ else:
1088
+ text_encoder_projection_dim = self.text_encoder_2_configs['projection_dim']
1089
+ add_time_ids = self._get_add_time_ids(
1090
+ original_size,
1091
+ self.crops_coords_top_left,
1092
+ target_size,
1093
+ dtype=prompt_embeds.dtype,
1094
+ text_encoder_projection_dim=text_encoder_projection_dim,
1095
+ )
1096
+
1097
+ if self.negative_original_size is not None and self.negative_target_size is not None:
1098
+ negative_add_time_ids = self._get_add_time_ids(
1099
+ self.negative_original_size,
1100
+ self.negative_crops_coords_top_left,
1101
+ self.negative_target_size,
1102
+ dtype=prompt_embeds.dtype,
1103
+ text_encoder_projection_dim=text_encoder_projection_dim,
1104
+ )
1105
+ else:
1106
+ negative_add_time_ids = add_time_ids
1107
+
1108
+ if ip_adapter_image is not None:
1109
+ #Call CLIPImageProcessor model.py
1110
+ # clip_image = self.clip_image_processor(images=ip_adapter_image, return_tensors="pt").pixel_values
1111
+ input_image_clip = Image.fromarray(ip_adapter_image)
1112
+ clip_image_processor_input = pb_utils.Tensor("IP_ADAPTER_IMAGE", input_image_clip)
1113
+ inference_request = pb_utils.InferenceRequest(
1114
+ model_name="clip_image_processor",
1115
+ requested_output_names=["pixel_values"],
1116
+ inputs=[clip_image_processor_input],
1117
+ )
1118
+ inference_response = inference_request.exec()
1119
+ if inference_response.has_error():
1120
+ raise pb_utils.TritonModelException(
1121
+ inference_response.error().message()
1122
+ )
1123
+ else:
1124
+ clip_image = pb_utils.get_output_tensor_by_name(
1125
+ inference_response, "pixel_values"
1126
+ )
1127
+ clip_image: torch.Tensor = torch.from_dlpack(clip_image.to_dlpack())
1128
+
1129
+ clip_image = clip_image.to(self.device, dtype=torch.float32)
1130
+
1131
+ #Call Image_encoder model.onnx
1132
+ # clip_image_embeds = self.image_encoder.run(None, {'image_embedding': clip_image[0].unsqueeze(0).cpu().numpy()})
1133
+ image_encoder_input = clip_image[0].unsqueeze(0).cpu().numpy()
1134
+ image_encoder_input_triton = pb_utils.Tensor("IMAGE_EMBEDDING", image_encoder_input)
1135
+ inference_request = pb_utils.InferenceRequest(
1136
+ model_name="image_encoder",
1137
+ requested_output_names=["image_encoder"],
1138
+ inputs=[image_encoder_input_triton],
1139
+ )
1140
+ inference_response = inference_request.exec()
1141
+ if inference_response.has_error():
1142
+ raise pb_utils.TritonModelException(
1143
+ inference_response.error().message()
1144
+ )
1145
+ else:
1146
+ clip_image_embeds = pb_utils.get_output_tensor_by_name(
1147
+ inference_response, "image_encoder"
1148
+ )
1149
+ clip_image_embeds: torch.Tensor = torch.from_dlpack(clip_image_embeds.to_dlpack())
1150
+
1151
+
1152
+ #Call Proj model.onnx
1153
+ # image_prompt_embeds = self.image_proj.run(None, {'clip_image_embeds': clip_image_embeds[0].astype(np.float32)})
1154
+ proj_image_input = clip_image_embeds[0].astype(np.float32)
1155
+ proj_iamge_input_triton = pb_utils.Tensor("CLIP_IMAGE_EMBEDS", proj_image_input)
1156
+ inference_request = pb_utils.InferenceRequest(
1157
+ model_name="proj",
1158
+ requested_output_names=["image_prompt_embeds"],
1159
+ inputs=[proj_iamge_input_triton],
1160
+ )
1161
+ inference_response = inference_request.exec()
1162
+ if inference_response.has_error():
1163
+ raise pb_utils.TritonModelException(
1164
+ inference_response.error().message()
1165
+ )
1166
+ else:
1167
+ image_prompt_embeds = pb_utils.get_output_tensor_by_name(
1168
+ inference_response, "image_prompt_embeds"
1169
+ )
1170
+ image_prompt_embeds: torch.Tensor = torch.from_dlpack(image_prompt_embeds.to_dlpack())
1171
+
1172
+ #Call Proj model.onnx
1173
+ # uncond_image_prompt_embeds = self.image_proj.run(None, {'clip_image_embeds': torch.zeros_like(torch.tensor(clip_image_embeds[0])).cpu().numpy().astype(np.float32)})
1174
+ proj_uncond_image_input = clip_image_embeds[0].astype(np.float32)
1175
+ proj_uncond_iamge_input_triton = pb_utils.Tensor("CLIP_IMAGE_EMBEDS", proj_uncond_image_input)
1176
+ inference_request = pb_utils.InferenceRequest(
1177
+ model_name="proj",
1178
+ requested_output_names=["image_prompt_embeds"],
1179
+ inputs=[proj_uncond_iamge_input_triton],
1180
+ )
1181
+ inference_response = inference_request.exec()
1182
+ if inference_response.has_error():
1183
+ raise pb_utils.TritonModelException(
1184
+ inference_response.error().message()
1185
+ )
1186
+ else:
1187
+ uncond_image_prompt_embeds = pb_utils.get_output_tensor_by_name(
1188
+ inference_response, "image_prompt_embeds"
1189
+ )
1190
+ uncond_image_prompt_embeds: torch.Tensor = torch.from_dlpack(uncond_image_prompt_embeds.to_dlpack())
1191
+
1192
+
1193
+ image_prompt_embeds = torch.from_numpy(image_prompt_embeds[0]).to(self.device)
1194
+ uncond_image_prompt_embeds = torch.from_numpy(uncond_image_prompt_embeds[0]).to(self.device)
1195
+
1196
+ bs_embed, seq_len, _ = image_prompt_embeds.shape
1197
+ image_prompt_embeds = image_prompt_embeds.repeat(1, self.num_images_per_prompt, 1)
1198
+ image_prompt_embeds = image_prompt_embeds.view(bs_embed * self.num_images_per_prompt, seq_len, -1)
1199
+ uncond_image_prompt_embeds = uncond_image_prompt_embeds.repeat(1, self.num_images_per_prompt, 1)
1200
+ uncond_image_prompt_embeds = uncond_image_prompt_embeds.view(bs_embed * self.num_images_per_prompt, seq_len, -1)
1201
+ prompt_embeds = torch.cat([torch.zeros_like(prompt_embeds), image_prompt_embeds], dim=1)
1202
+ negative_prompt_embeds = torch.cat([torch.zeros_like(negative_prompt_embeds), uncond_image_prompt_embeds], dim=1)
1203
+
1204
+ if self.do_classifier_free_guidance:
1205
+ prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
1206
+ add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
1207
+ add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
1208
+
1209
+ prompt_embeds = prompt_embeds.to(self.device)
1210
+ add_text_embeds = add_text_embeds.to(self.device)
1211
+ add_time_ids = add_time_ids.to(self.device).repeat(batch_size * self.num_images_per_prompt, 1)
1212
+
1213
+ if controlnet_image is not None and self.controlnet is not None:
1214
+ prepare_image_input = Image.fromarray(controlnet_image)
1215
+ controlnet_image = self.prepare_image(
1216
+ prepare_image_input,
1217
+ width,
1218
+ height,
1219
+ batch_size,
1220
+ self.num_images_per_prompt,
1221
+ self.device,
1222
+ self.dtype,
1223
+ do_classifier_free_guidance=self.do_classifier_free_guidance,
1224
+ )
1225
+ num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
1226
+
1227
+ if (
1228
+ self.denoising_end is not None
1229
+ and isinstance(self.denoising_end, float)
1230
+ and self.denoising_end > 0
1231
+ and self.denoising_end < 1
1232
+ ):
1233
+ discrete_timestep_cutoff = int(
1234
+ round(
1235
+ self.scheduler.config.num_train_timesteps
1236
+ - (self.denoising_end * self.scheduler.config.num_train_timesteps)
1237
+ )
1238
+ )
1239
+ num_inference_steps = len(list(filter(lambda ts: ts >= discrete_timestep_cutoff, timesteps)))
1240
+ timesteps = timesteps[:num_inference_steps]
1241
+
1242
+
1243
+
1244
+ self._num_timesteps = len(timesteps)
1245
+ for i, t in enumerate(timesteps):
1246
+ print('Step:', i)
1247
+ if self.interrupt:
1248
+ continue
1249
+
1250
+ latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
1251
+
1252
+ latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
1253
+
1254
+ #Call cnext model.onnx
1255
+ # controls = self.controlnet.run(None, {'controlnext_image': controlnet_image.cpu().numpy(),
1256
+ # 'timestep': t.unsqueeze(0).cpu().numpy().astype(np.float32),})
1257
+
1258
+ input_cnext = [
1259
+ pb_utils.Tensor("controlnext_image", controlnet_image.cpu().numpy().astype(np.float32)),
1260
+ pb_utils.Tensor("timestep", t.unsqueeze(0).cpu().numpy().astype(np.float32)),
1261
+ ]
1262
+
1263
+ inference_request = pb_utils.InferenceRequest(
1264
+ model_name=cnext_model_name,
1265
+ requested_output_names=['sample'],
1266
+ inputs=input_cnext,
1267
+ )
1268
+ inference_response = inference_request.exec()
1269
+ if inference_response.has_error():
1270
+ raise pb_utils.TritonModelException(
1271
+ inference_response.error().message())
1272
+ else:
1273
+ controls = pb_utils.get_output_tensor_by_name(
1274
+ inference_response, "sample"
1275
+ )
1276
+ controls: torch.Tensor = torch.from_dlpack(controls.to_dlpack())
1277
+
1278
+ scale = torch.tensor([self.control_scale])
1279
+
1280
+
1281
+ #Call the unet model.onnx
1282
+ input_unet = [
1283
+ pb_utils.Tensor("control_out", latent_model_input.cpu().numpy().astype(np.float32)),
1284
+ pb_utils.Tensor("timestep", t.unsqueeze(0).cpu().numpy().astype(np.float32)),
1285
+ pb_utils.Tensor("encoder_hidden_state", prompt_embeds.cpu().numpy().astype(np.float32)),
1286
+ pb_utils.Tensor("control_out", controls[0].astype(np.float32)),
1287
+ pb_utils.Tensor("control_scale", scale.cpu().numpy().astype(np.float32))
1288
+ ]
1289
+
1290
+ inference_request = pb_utils.InferenceRequest(
1291
+ model_name=cnext_model_name,
1292
+ requested_output_names=['predict_noise'],
1293
+ inputs=input_unet,
1294
+ )
1295
+ inference_response = inference_request.exec()
1296
+ if inference_response.has_error():
1297
+ raise pb_utils.TritonModelException(
1298
+ inference_response.error().message())
1299
+ else:
1300
+ noise_pred = pb_utils.get_output_tensor_by_name(
1301
+ inference_response, "predict_noise"
1302
+ )
1303
+ noise_pred: torch.Tensor = torch.from_dlpack(noise_pred.to_dlpack())
1304
+
1305
+ noise_pred = torch.from_numpy(noise_pred[0]).to(self.device)
1306
+
1307
+ # perform guidance
1308
+ if self.do_classifier_free_guidance:
1309
+ noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
1310
+ noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
1311
+
1312
+ if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
1313
+ # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
1314
+ noise_pred = self.rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
1315
+
1316
+
1317
+ latents_dtype = latents.dtype
1318
+ latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
1319
+ if latents.dtype != latents_dtype:
1320
+ if torch.backends.mps.is_available():
1321
+ # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
1322
+ latents = latents.to(latents_dtype)
1323
+
1324
+ if self.callback_on_step_end is not None:
1325
+ callback_kwargs = {}
1326
+ for k in self.callback_on_step_end_tensor_inputs:
1327
+ callback_kwargs[k] = locals()[k]
1328
+ callback_outputs = self.callback_on_step_end(self, i, t, callback_kwargs)
1329
+
1330
+ latents = callback_outputs.pop("latents", latents)
1331
+ prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
1332
+ negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
1333
+ add_text_embeds = callback_outputs.pop("add_text_embeds", add_text_embeds)
1334
+ negative_pooled_prompt_embeds = callback_outputs.pop(
1335
+ "negative_pooled_prompt_embeds", negative_pooled_prompt_embeds
1336
+ )
1337
+ add_time_ids = callback_outputs.pop("add_time_ids", add_time_ids)
1338
+ negative_add_time_ids = callback_outputs.pop("negative_add_time_ids", negative_add_time_ids)
1339
+
1340
+ if not self.output_type == "latent":
1341
+ '''needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
1342
+
1343
+ if needs_upcasting:
1344
+ self.upcast_vae()
1345
+ latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
1346
+ elif latents.dtype != self.vae.dtype:
1347
+ if torch.backends.mps.is_available():
1348
+ # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
1349
+ self.vae = self.vae.to(latents.dtype)'''
1350
+
1351
+ has_latents_mean = hasattr(self.vae_configs, "latents_mean") and self.vae_configs['latents_mean'] is not None
1352
+ has_latents_std = hasattr(self.vae_configs, "latents_std") and self.vae_configs['latents_std'] is not None
1353
+ if has_latents_mean and has_latents_std:
1354
+ latents_mean = (
1355
+ torch.tensor(self.vae_configs['latents_mean']).view(1, 4, 1, 1).to(latents.device, latents.dtype)
1356
+ )
1357
+ latents_std = (
1358
+ torch.tensor(self.vae_configs['latents_std']).view(1, 4, 1, 1).to(latents.device, latents.dtype)
1359
+ )
1360
+ latents = latents * latents_std / self.vae_configs['scaling_factor'] + latents_mean
1361
+ else:
1362
+ latents = latents / self.vae_configs['scaling_factor']
1363
+ #Call VAE model.onnx
1364
+ # image = self.vae.run(None, {'latent_sample': latents.cpu().numpy()})[0]
1365
+ input_vae = [
1366
+ pb_utils.Tensor.from_dlpack(
1367
+ "latent_sample", latents.cpu().numpy().astype(np.float32)
1368
+ )
1369
+ ]
1370
+ self.logger.log_warn(f"latent_sample for vae: {latents.shape}")
1371
+ self.logger.log_warn(f"latent_sample for vae: {type(latents)}")
1372
+
1373
+ inference_request = pb_utils.InferenceRequest(
1374
+ model_name="vae_decode",
1375
+ requested_output_names=["sample"],
1376
+ inputs=input_vae,
1377
+ )
1378
+ inference_response = inference_request.exec()
1379
+ if inference_response.has_error():
1380
+ raise pb_utils.TritonModelException(
1381
+ inference_response.error().message()
1382
+ )
1383
+ else:
1384
+ output = pb_utils.get_output_tensor_by_name(inference_response, "sample")
1385
+ image: torch.Tensor = torch.from_dlpack(output.to_dlpack())
1386
+ else:
1387
+ image = latents
1388
+
1389
+ image = image.cpu().numpy().astypee(np.float32)
1390
+
1391
+ tensor_output = [pb_utils.Tensor("IMAGES", image)]
1392
+ responses.append(pb_utils.InferenceResponse(tensor_output))
1393
+ return responses
1394
+
1395
+ def finalize(self) -> None:
1396
+ """
1397
+ Called when the model is being unloaded from memory.
1398
+ """
1399
+ pass
1400
+
1401
+
1402
+
1403
+
1404
+
1405
+
1406
+
1407
+
1408
+
1409
+
1410
+
Triton/bls_sdxl_cnext_ip_anime/config.pbtxt ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "bls_sdxl_cnext_ip_anime"
2
+ max_batch_size: 0
3
+ backend: "python"
4
+
5
+ input [
6
+ {
7
+ name: "PROMPT"
8
+ data_type: TYPE_STRING
9
+ dims: [ -1 ]
10
+ },
11
+ {
12
+ name: "PROMPT_2"
13
+ data_type: TYPE_STRING
14
+ dims: [ -1 ]
15
+ },
16
+ {
17
+ name: "NEGATIVE_PROMPT"
18
+ data_type: TYPE_STRING
19
+ dims: [ -1 ]
20
+ },
21
+ {
22
+ name: "NEGATIVE_PROMPT_2"
23
+ data_type: TYPE_STRING
24
+ dims: [ -1 ]
25
+ },
26
+ {
27
+ name: "CNEXT_MODEL_NAME"
28
+ data_type: TYPE_STRING
29
+ dims: [ -1 ]
30
+ },
31
+ {
32
+ name: "CNEXT_IMAGE"
33
+ data_type: TYPE_FP32
34
+ dims: [-1, -1, 3]
35
+ },
36
+ {
37
+ name: "IP_ADAPTER_IMAGE"
38
+ data_type: TYPE_FP32
39
+ dims: [-1, -1, 3]
40
+ },
41
+ {
42
+ name: "GUIDANCE_SCALE"
43
+ data_type: TYPE_FP32
44
+ dims: [ -1 ]
45
+ },
46
+ {
47
+ name: "STEPS"
48
+ data_type: TYPE_INT32
49
+ dims: [ -1 ]
50
+ },
51
+ {
52
+ name: "CNEXT_CONDITIONAL_SCALE"
53
+ data_type: TYPE_FP32
54
+ dims: [ -1 ]
55
+ },
56
+ {
57
+ name: "SEED"
58
+ data_type: TYPE_INT64
59
+ dims: [ -1 ]
60
+ },
61
+ {
62
+ name: "HEIGHT"
63
+ data_type: TYPE_INT8 #Em xai` INT8 chac dc r anh ha, tai me thay no chi la 1 so 1024 thoi a
64
+ dims: [ -1 ]
65
+ },
66
+ {
67
+ name: "WIDTH"
68
+ data_type: TYPE_INT8
69
+ dims: [ -1 ]
70
+ }
71
+ ]
72
+
73
+ output [
74
+ {
75
+ name: "IMAGES"
76
+ data_type: TYPE_FP32
77
+ dims: [ -1 , -1, -1, -1]
78
+ }
79
+ ]
80
+
81
+ instance_group [
82
+ {
83
+ kind: KIND_CPU
84
+ }
85
+ ]
Triton/cnext_canny_anime/config.pbtxt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "cnext_canny_anime"
2
+ backend: "onnxruntime"
3
+ max_batch_size: 0
4
+
5
+ input [
6
+ {
7
+ name: "timestep"
8
+ data_type: TYPE_FP32
9
+ dims: [1] # Fixed batch size
10
+ },
11
+ {
12
+ name: "controlnext_image"
13
+ data_type: TYPE_FP32
14
+ dims: [-1, 3, -1, -1] # Dynamic batch size, 3 channels, dynamic height, width
15
+ }
16
+ ]
17
+
18
+ instance_group [
19
+ {
20
+ kind: KIND_CPU
21
+ }
22
+ ]
23
+
24
+ optimization { execution_accelerators {
25
+ cpu_execution_accelerator : [ {
26
+ name : "openvino"
27
+ }]
28
+ }}
Triton/image_encoder/config.pbtxt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "image_encoder"
2
+ backend: "onnxruntime"
3
+ max_batch_size: 0
4
+
5
+ input [
6
+ {
7
+ name: "image_embedding"
8
+ data_type: TYPE_FP32
9
+ dims: [-1, -1, -1, -1]
10
+ }
11
+ ]
12
+
13
+ output [
14
+ {
15
+ name: "image_encoder"
16
+ data_type: TYPE_FP32
17
+ dims: [-1, 1280]
18
+ }
19
+ ]
20
+
21
+ instance_group [
22
+ {
23
+ kind: KIND_CPU
24
+ }
25
+ ]
26
+
27
+ optimization { execution_accelerators {
28
+ cpu_execution_accelerator : [ {
29
+ name : "openvino"
30
+ }]
31
+ }}
Triton/proj/config.pbtxt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "proj"
2
+ backend: "onnxruntime"
3
+ max_batch_size: 0
4
+
5
+ input [
6
+ {
7
+ name: "clip_image_embeds"
8
+ data_type: TYPE_FP32
9
+ dims: [-1, -1]
10
+ }
11
+ ]
12
+
13
+ output [
14
+ {
15
+ name: "image_prompt_embeds"
16
+ data_type: TYPE_FP32
17
+ dims: [-1, 4, 2048]
18
+ }
19
+ ]
20
+
21
+ instance_group [
22
+ {
23
+ kind: KIND_CPU
24
+ }
25
+ ]
26
+
27
+ optimization { execution_accelerators {
28
+ cpu_execution_accelerator : [ {
29
+ name : "openvino"
30
+ }]
31
+ }}
Triton/text_encoder/config.pbtxt ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "text_encoder"
2
+ platform: "onnxruntime_onnx"
3
+ max_batch_size: 0
4
+
5
+ input [
6
+ {
7
+ name: "input_ids"
8
+ data_type: TYPE_INT32
9
+ dims: [-1, -1]
10
+ }
11
+ ]
12
+
13
+ output [
14
+ {
15
+ name: "last_hidden_state"
16
+ data_type: TYPE_FP32
17
+ dims: [-1, -1, 768]
18
+ },
19
+ {
20
+ name: "pooler_output"
21
+ data_type: TYPE_FP32
22
+ dims: [-1, 768]
23
+ },
24
+ {
25
+ name: "hidden_states.0"
26
+ data_type: TYPE_FP32
27
+ dims: [-1, -1, 768]
28
+ },
29
+ {
30
+ name: "hidden_states.1"
31
+ data_type: TYPE_FP32
32
+ dims: [-1, -1, 768]
33
+ },
34
+ {
35
+ name: "hidden_states.2"
36
+ data_type: TYPE_FP32
37
+ dims: [-1, -1, 768]
38
+ },
39
+ {
40
+ name: "hidden_states.3"
41
+ data_type: TYPE_FP32
42
+ dims: [-1, -1, 768]
43
+ },
44
+ {
45
+ name: "hidden_states.4"
46
+ data_type: TYPE_FP32
47
+ dims: [-1, -1, 768]
48
+ },
49
+ {
50
+ name: "hidden_states.5"
51
+ data_type: TYPE_FP32
52
+ dims: [-1, -1, 768]
53
+ },
54
+ {
55
+ name: "hidden_states.6"
56
+ data_type: TYPE_FP32
57
+ dims: [-1, -1, 768]
58
+ },
59
+ {
60
+ name: "hidden_states.7"
61
+ data_type: TYPE_FP32
62
+ dims: [-1, -1, 768]
63
+ },
64
+ {
65
+ name: "hidden_states.8"
66
+ data_type: TYPE_FP32
67
+ dims: [-1, -1, 768]
68
+ },
69
+ {
70
+ name: "hidden_states.9"
71
+ data_type: TYPE_FP32
72
+ dims: [-1, -1, 768]
73
+ },
74
+ {
75
+ name: "hidden_states.10"
76
+ data_type: TYPE_FP32
77
+ dims: [-1, -1, 768]
78
+ },
79
+ {
80
+ name: "hidden_states.11"
81
+ data_type: TYPE_FP32
82
+ dims: [-1, -1, 768]
83
+ },
84
+ {
85
+ name: "hidden_states.12"
86
+ data_type: TYPE_FP32
87
+ dims: [-1, -1, 768]
88
+ }
89
+ ]
90
+
91
+ instance_group [
92
+ {
93
+ kind: KIND_CPU
94
+ }
95
+ ]
96
+
97
+ optimization {
98
+ execution_accelerators {
99
+ cpu_execution_accelerator: [
100
+ {
101
+ name: "openvino"
102
+ }
103
+ ]
104
+ }
105
+ }
Triton/text_encoder_2/config.pbtxt ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "text_encoder_2"
2
+ platform: "onnxruntime_onnx"
3
+ max_batch_size: 0
4
+
5
+ input [
6
+ {
7
+ name: "input_ids"
8
+ data_type: TYPE_INT64
9
+ dims: [-1, -1]
10
+ }
11
+ ]
12
+
13
+ output [
14
+ {
15
+ name: "text_embeds"
16
+ data_type: TYPE_FP32
17
+ dims: [-1, 1280]
18
+ },
19
+ {
20
+ name: "last_hidden_state"
21
+ data_type: TYPE_FP32
22
+ dims: [-1, -1, 1280]
23
+ },
24
+ {
25
+ name: "hidden_states.0"
26
+ data_type: TYPE_FP32
27
+ dims: [-1, -1, 1280]
28
+ },
29
+ {
30
+ name: "hidden_states.1"
31
+ data_type: TYPE_FP32
32
+ dims: [-1, -1, 1280]
33
+ },
34
+ {
35
+ name: "hidden_states.2"
36
+ data_type: TYPE_FP32
37
+ dims: [-1, -1, 1280]
38
+ },
39
+ {
40
+ name: "hidden_states.3"
41
+ data_type: TYPE_FP32
42
+ dims: [-1, -1, 1280]
43
+ },
44
+ {
45
+ name: "hidden_states.4"
46
+ data_type: TYPE_FP32
47
+ dims: [-1, -1, 1280]
48
+ },
49
+ {
50
+ name: "hidden_states.5"
51
+ data_type: TYPE_FP32
52
+ dims: [-1, -1, 1280]
53
+ },
54
+ {
55
+ name: "hidden_states.6"
56
+ data_type: TYPE_FP32
57
+ dims: [-1, -1, 1280]
58
+ },
59
+ {
60
+ name: "hidden_states.7"
61
+ data_type: TYPE_FP32
62
+ dims: [-1, -1, 1280]
63
+ },
64
+ {
65
+ name: "hidden_states.8"
66
+ data_type: TYPE_FP32
67
+ dims: [-1, -1, 1280]
68
+ },
69
+ {
70
+ name: "hidden_states.9"
71
+ data_type: TYPE_FP32
72
+ dims: [-1, -1, 1280]
73
+ },
74
+ {
75
+ name: "hidden_states.10"
76
+ data_type: TYPE_FP32
77
+ dims: [-1, -1, 1280]
78
+ },
79
+ {
80
+ name: "hidden_states.11"
81
+ data_type: TYPE_FP32
82
+ dims: [-1, -1, 1280]
83
+ },
84
+ {
85
+ name: "hidden_states.12"
86
+ data_type: TYPE_FP32
87
+ dims: [-1, -1, 1280]
88
+ },
89
+ {
90
+ name: "hidden_states.13"
91
+ data_type: TYPE_FP32
92
+ dims: [-1, -1, 1280]
93
+ },
94
+ {
95
+ name: "hidden_states.14"
96
+ data_type: TYPE_FP32
97
+ dims: [-1, -1, 1280]
98
+ },
99
+ {
100
+ name: "hidden_states.15"
101
+ data_type: TYPE_FP32
102
+ dims: [-1, -1, 1280]
103
+ },
104
+ {
105
+ name: "hidden_states.16"
106
+ data_type: TYPE_FP32
107
+ dims: [-1, -1, 1280]
108
+ },
109
+ {
110
+ name: "hidden_states.17"
111
+ data_type: TYPE_FP32
112
+ dims: [-1, -1, 1280]
113
+ },
114
+ {
115
+ name: "hidden_states.18"
116
+ data_type: TYPE_FP32
117
+ dims: [-1, -1, 1280]
118
+ },
119
+ {
120
+ name: "hidden_states.19"
121
+ data_type: TYPE_FP32
122
+ dims: [-1, -1, 1280]
123
+ },
124
+ {
125
+ name: "hidden_states.20"
126
+ data_type: TYPE_FP32
127
+ dims: [-1, -1, 1280]
128
+ },
129
+ {
130
+ name: "hidden_states.21"
131
+ data_type: TYPE_FP32
132
+ dims: [-1, -1, 1280]
133
+ },
134
+ {
135
+ name: "hidden_states.22"
136
+ data_type: TYPE_FP32
137
+ dims: [-1, -1, 1280]
138
+ },
139
+ {
140
+ name: "hidden_states.23"
141
+ data_type: TYPE_FP32
142
+ dims: [-1, -1, 1280]
143
+ },
144
+ {
145
+ name: "hidden_states.24"
146
+ data_type: TYPE_FP32
147
+ dims: [-1, -1, 1280]
148
+ },
149
+ {
150
+ name: "hidden_states.25"
151
+ data_type: TYPE_FP32
152
+ dims: [-1, -1, 1280]
153
+ },
154
+ {
155
+ name: "hidden_states.26"
156
+ data_type: TYPE_FP32
157
+ dims: [-1, -1, 1280]
158
+ },
159
+ {
160
+ name: "hidden_states.27"
161
+ data_type: TYPE_FP32
162
+ dims: [-1, -1, 1280]
163
+ },
164
+ {
165
+ name: "hidden_states.28"
166
+ data_type: TYPE_FP32
167
+ dims: [-1, -1, 1280]
168
+ },
169
+ {
170
+ name: "hidden_states.29"
171
+ data_type: TYPE_FP32
172
+ dims: [-1, -1, 1280]
173
+ },
174
+ {
175
+ name: "hidden_states.30"
176
+ data_type: TYPE_FP32
177
+ dims: [-1, -1, 1280]
178
+ },
179
+ {
180
+ name: "hidden_states.31"
181
+ data_type: TYPE_FP32
182
+ dims: [-1, -1, 1280]
183
+ },
184
+ {
185
+ name: "hidden_states.32"
186
+ data_type: TYPE_FP32
187
+ dims: [-1, -1, 1280]
188
+ }
189
+ ]
190
+
191
+ instance_group [
192
+ {
193
+ kind: KIND_CPU
194
+ }
195
+ ]
196
+
197
+ optimization {
198
+ execution_accelerators {
199
+ cpu_execution_accelerator: [
200
+ {
201
+ name: "openvino"
202
+ }
203
+ ]
204
+ }
205
+ }
Triton/tokenizer/1/__pycache__/model.cpython-310.pyc ADDED
Binary file (2.32 kB). View file
 
Triton/tokenizer/1/config/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
Triton/tokenizer/1/config/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
Triton/tokenizer/1/config/tokenizer_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "49406": {
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49407": {
13
+ "content": "<|endoftext|>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ }
20
+ },
21
+ "bos_token": "<|startoftext|>",
22
+ "clean_up_tokenization_spaces": true,
23
+ "do_lower_case": true,
24
+ "eos_token": "<|endoftext|>",
25
+ "errors": "replace",
26
+ "model_max_length": 77,
27
+ "pad_token": "<|endoftext|>",
28
+ "tokenizer_class": "CLIPTokenizer",
29
+ "unk_token": "<|endoftext|>"
30
+ }
Triton/tokenizer/1/config/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
Triton/tokenizer/1/model.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import os
4
+ from typing import Dict, List
5
+ from transformers import CLIPTokenizer
6
+ from pathlib import Path
7
+ from typing import Callable, List, Optional, Union, Dict
8
+ import numpy as np
9
+ import triton_python_backend_utils as pb_utils
10
+ from transformers import AutoTokenizer, PreTrainedTokenizer, TensorType
11
+
12
+
13
+ class TritonPythonModel:
14
+ def initialize(self, args: Dict[str, str]):
15
+ current_name: str = str(Path(args["model_repository"]).parent.absolute())
16
+
17
+ self.tokenizer = CLIPTokenizer.from_pretrained(
18
+ current_name + "/tokenizer/1/config/"
19
+ )
20
+
21
+ def execute(self, requests) -> "List[List[pb_utils.Tensor]]":
22
+ """
23
+ Parse and tokenize each request
24
+ :param requests: 1 or more requests received by Triton server.
25
+ :return: text as input tensors
26
+ """
27
+ responses = []
28
+ # TODO: update to process batch requests
29
+ for request in requests:
30
+ # binary data typed back to string
31
+ query = [
32
+ t.decode("UTF-8")
33
+ for t in pb_utils.get_input_tensor_by_name(request, "TEXT")
34
+ .as_numpy()
35
+ .tolist()
36
+ ]
37
+
38
+ # tokenization
39
+ text_input_ids = self.tokenizer(
40
+ query,
41
+ padding="max_length",
42
+ max_length=self.tokenizer.model_max_length,
43
+ truncation=True,
44
+ return_tensors="np",
45
+ ).input_ids.astype(np.int32)
46
+
47
+ # raw tokenization without truncation
48
+ untruncated_ids = self.tokenizer(
49
+ query,
50
+ padding="max_length",
51
+ return_tensors="np"
52
+ ).input_ids.astype(np.int32)
53
+
54
+ # only for logging
55
+ if not np.array_equal(text_input_ids, untruncated_ids):
56
+ removed_text = self.tokenizer.batch_decode(
57
+ untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1]
58
+ )
59
+ self.logger.log_warn(
60
+ "The following part of your input was truncated because CLIP can only handle sequences up to"
61
+ f" {self.tokenizer.model_max_length} tokens: {removed_text}"
62
+ )
63
+
64
+ # communicate the tokenization results to Triton server
65
+ tensor_output = pb_utils.Tensor('input_ids', text_input_ids)
66
+ inference_response = pb_utils.InferenceResponse(output_tensors=[tensor_output])
67
+ responses.append(inference_response)
68
+
69
+ return responses
Triton/tokenizer/config.pbtxt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "tokenizer"
2
+ max_batch_size: 0
3
+ backend: "python"
4
+
5
+ input [
6
+ {
7
+ name: "TEXT"
8
+ data_type: TYPE_STRING
9
+ dims: [ -1 ]
10
+ }
11
+ ]
12
+
13
+ output [
14
+ {
15
+ name: "input_ids"
16
+ data_type: TYPE_INT32
17
+ dims: [-1, -1]
18
+ },
19
+ {
20
+ name: "attention_mask"
21
+ data_type: TYPE_INT32
22
+ dims: [-1, -1]
23
+ }
24
+ ]
25
+
26
+ instance_group [
27
+ {
28
+ kind: KIND_CPU
29
+ }
30
+ ]
Triton/tokenizer_2/1/__pycache__/model.cpython-310.pyc ADDED
Binary file (2.32 kB). View file
 
Triton/tokenizer_2/1/config/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
Triton/tokenizer_2/1/config/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "!",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
Triton/tokenizer_2/1/config/tokenizer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "!",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49406": {
13
+ "content": "<|startoftext|>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "49407": {
21
+ "content": "<|endoftext|>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "bos_token": "<|startoftext|>",
30
+ "clean_up_tokenization_spaces": true,
31
+ "do_lower_case": true,
32
+ "eos_token": "<|endoftext|>",
33
+ "errors": "replace",
34
+ "model_max_length": 77,
35
+ "pad_token": "!",
36
+ "tokenizer_class": "CLIPTokenizer",
37
+ "unk_token": "<|endoftext|>"
38
+ }
Triton/tokenizer_2/1/config/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
Triton/tokenizer_2/1/model.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import os
4
+ from typing import Dict, List
5
+ from transformers import CLIPTokenizer
6
+ from pathlib import Path
7
+ from typing import Callable, List, Optional, Union, Dict
8
+ import numpy as np
9
+ import triton_python_backend_utils as pb_utils
10
+ from transformers import AutoTokenizer, PreTrainedTokenizer, TensorType
11
+
12
+
13
+ class TritonPythonModel:
14
+ def initialize(self, args: Dict[str, str]):
15
+ current_name: str = str(Path(args["model_repository"]).parent.absolute())
16
+
17
+ self.tokenizer = CLIPTokenizer.from_pretrained(
18
+ current_name + "/tokenizer_2/1/config/"
19
+ )
20
+
21
+ def execute(self, requests) -> "List[List[pb_utils.Tensor]]":
22
+ """
23
+ Parse and tokenize each request
24
+ :param requests: 1 or more requests received by Triton server.
25
+ :return: text as input tensors
26
+ """
27
+ responses = []
28
+ # TODO: update to process batch requests
29
+ for request in requests:
30
+ # binary data typed back to string
31
+ query = [
32
+ t.decode("UTF-8")
33
+ for t in pb_utils.get_input_tensor_by_name(request, "TEXT")
34
+ .as_numpy()
35
+ .tolist()
36
+ ]
37
+
38
+ # tokenization
39
+ text_input_ids = self.tokenizer(
40
+ query,
41
+ padding="max_length",
42
+ max_length=self.tokenizer.model_max_length,
43
+ truncation=True,
44
+ return_tensors="np",
45
+ ).input_ids.astype(np.int32)
46
+
47
+ # raw tokenization without truncation
48
+ untruncated_ids = self.tokenizer(
49
+ query,
50
+ padding="max_length",
51
+ return_tensors="np"
52
+ ).input_ids.astype(np.int32)
53
+
54
+ # only for logging
55
+ if not np.array_equal(text_input_ids, untruncated_ids):
56
+ removed_text = self.tokenizer.batch_decode(
57
+ untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1]
58
+ )
59
+ self.logger.log_warn(
60
+ "The following part of your input was truncated because CLIP can only handle sequences up to"
61
+ f" {self.tokenizer.model_max_length} tokens: {removed_text}"
62
+ )
63
+
64
+ # communicate the tokenization results to Triton server
65
+ tensor_output = pb_utils.Tensor('input_ids', text_input_ids)
66
+ inference_response = pb_utils.InferenceResponse(output_tensors=[tensor_output])
67
+ responses.append(inference_response)
68
+
69
+ return responses
Triton/tokenizer_2/config.pbtxt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "tokenizer_2"
2
+ max_batch_size: 0
3
+ backend: "python"
4
+
5
+ input [
6
+ {
7
+ name: "TEXT"
8
+ data_type: TYPE_STRING
9
+ dims: [ -1 ]
10
+ }
11
+ ]
12
+
13
+ output [
14
+ {
15
+ name: "input_ids"
16
+ data_type: TYPE_INT32
17
+ dims: [-1, -1]
18
+ },
19
+ {
20
+ name: "attention_mask"
21
+ data_type: TYPE_INT32
22
+ dims: [-1, -1]
23
+ }
24
+ ]
25
+
26
+ instance_group [
27
+ {
28
+ kind: KIND_CPU
29
+ }
30
+ ]
Triton/unet/config.pbtxt ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "unet"
2
+ backend: "onnxruntime"
3
+ max_batch_size: 0
4
+
5
+ input [
6
+ {
7
+ name: "control_out"
8
+ data_type: TYPE_FP32
9
+ dims: [-1, 320, 128, 128] # Dynamic batch size
10
+ },
11
+ {
12
+ name: "encoder_hidden_state"
13
+ data_type: TYPE_FP32
14
+ dims: [-1, -1, -1] # Dynamic batch size
15
+ },
16
+ {
17
+ name: "sample"
18
+ data_type: TYPE_FP32
19
+ dims: [-1, 4, 128, 128] # Dynamic batch size
20
+ },
21
+ {
22
+ name: "timestep"
23
+ data_type: TYPE_FP32
24
+ dims: [1] # Dynamic batch size
25
+ },
26
+ {
27
+ name: "control_scale"
28
+ data_type: TYPE_FP32
29
+ dims: [1] # Dynamic batch size
30
+ }
31
+ ]
32
+ output [
33
+ {
34
+ name: "predict_noise"
35
+ data_type: TYPE_FP32
36
+ dims: [-1, 4, 128, 128] # Dynamic batch size
37
+ }
38
+ ]
39
+ instance_group [
40
+ {
41
+ kind: KIND_CPU
42
+ }
43
+ ]
44
+
45
+ optimization { execution_accelerators {
46
+ cpu_execution_accelerator : [ {
47
+ name : "openvino"
48
+ }]
49
+ }}
Triton/vae_decoder/config.pbtxt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "vae_decoder"
2
+ backend: "onnxruntime"
3
+ max_batch_size: 0
4
+
5
+ input [
6
+ {
7
+ name: "latent_sample"
8
+ data_type: TYPE_FP32
9
+ dims: [-1, 4, -1, -1] # Dynamic batch size
10
+ }
11
+ ]
12
+ output [
13
+ {
14
+ name: "sample"
15
+ data_type: TYPE_FP32
16
+ dims: [-1, 3, -1, -1] # Dynamic batch size
17
+ }
18
+ ]
19
+ instance_group [
20
+ {
21
+ kind: KIND_CPU
22
+ }
23
+ ]
24
+
25
+ optimization { execution_accelerators {
26
+ cpu_execution_accelerator : [ {
27
+ name : "openvino"
28
+ }]
29
+ }}