rupeshs commited on
Commit
6dd62e4
·
1 Parent(s): 3469d37

updated files

Browse files
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: Fastsdcpu
3
- emoji: 📉
4
- colorFrom: blue
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 3.50.2
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/__pycache__/device.cpython-311.pyc ADDED
Binary file (1.57 kB). View file
 
backend/__pycache__/lcm_models.cpython-311.pyc ADDED
Binary file (542 Bytes). View file
 
backend/device.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import platform
2
+ from constants import DEVICE
3
+ import torch
4
+ import openvino as ov
5
+
6
+ core = ov.Core()
7
+
8
+
9
+ def is_openvino_device() -> bool:
10
+ if DEVICE.lower() == "cpu" or DEVICE.lower()[0] == "g":
11
+ return True
12
+ else:
13
+ return False
14
+
15
+
16
+ def get_device_name() -> str:
17
+ if DEVICE == "cuda" or DEVICE == "mps":
18
+ default_gpu_index = torch.cuda.current_device()
19
+ return torch.cuda.get_device_name(default_gpu_index)
20
+ elif platform.system().lower() == "darwin":
21
+ return platform.processor()
22
+ elif is_openvino_device():
23
+ return core.get_property(DEVICE.upper(), "FULL_DEVICE_NAME")
backend/lcm_models.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from constants import LCM_DEFAULT_MODEL
3
+
4
+
5
+ def get_available_models() -> List:
6
+ models = [
7
+ LCM_DEFAULT_MODEL,
8
+ "latent-consistency/lcm-sdxl",
9
+ "latent-consistency/lcm-ssd-1b",
10
+ ]
11
+ return models
backend/lcmdiffusion/pipelines/latent_consistency_txt2img.py DELETED
@@ -1,730 +0,0 @@
1
- # Copyright 2023 Stanford University Team and The HuggingFace Team. All rights reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- # DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pytorch_diffusion
16
- # and https://github.com/hojonathanho/diffusion
17
-
18
- import math
19
- from dataclasses import dataclass
20
- from typing import Any, Dict, List, Optional, Tuple, Union
21
-
22
- import numpy as np
23
- import torch
24
- from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
25
-
26
- from diffusers import AutoencoderKL, ConfigMixin, DiffusionPipeline, SchedulerMixin, UNet2DConditionModel, logging
27
- from diffusers.configuration_utils import register_to_config
28
- from diffusers.image_processor import VaeImageProcessor
29
- from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
30
- from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
31
- from diffusers.utils import BaseOutput
32
-
33
-
34
- logger = logging.get_logger(__name__) # pylint: disable=invalid-name
35
-
36
-
37
- class LatentConsistencyModelPipeline(DiffusionPipeline):
38
- _optional_components = ["scheduler"]
39
-
40
- def __init__(
41
- self,
42
- vae: AutoencoderKL,
43
- text_encoder: CLIPTextModel,
44
- tokenizer: CLIPTokenizer,
45
- unet: UNet2DConditionModel,
46
- scheduler: "LCMScheduler",
47
- safety_checker: StableDiffusionSafetyChecker,
48
- feature_extractor: CLIPImageProcessor,
49
- requires_safety_checker: bool = True,
50
- ):
51
- super().__init__()
52
-
53
- scheduler = (
54
- scheduler
55
- if scheduler is not None
56
- else LCMScheduler(
57
- beta_start=0.00085, beta_end=0.0120, beta_schedule="scaled_linear", prediction_type="epsilon"
58
- )
59
- )
60
-
61
- self.register_modules(
62
- vae=vae,
63
- text_encoder=text_encoder,
64
- tokenizer=tokenizer,
65
- unet=unet,
66
- scheduler=scheduler,
67
- safety_checker=safety_checker,
68
- feature_extractor=feature_extractor,
69
- )
70
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
71
- self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
72
-
73
- def _encode_prompt(
74
- self,
75
- prompt,
76
- device,
77
- num_images_per_prompt,
78
- prompt_embeds: None,
79
- ):
80
- r"""
81
- Encodes the prompt into text encoder hidden states.
82
- Args:
83
- prompt (`str` or `List[str]`, *optional*):
84
- prompt to be encoded
85
- device: (`torch.device`):
86
- torch device
87
- num_images_per_prompt (`int`):
88
- number of images that should be generated per prompt
89
- prompt_embeds (`torch.FloatTensor`, *optional*):
90
- Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
91
- provided, text embeddings will be generated from `prompt` input argument.
92
- """
93
-
94
- if prompt is not None and isinstance(prompt, str):
95
- pass
96
- elif prompt is not None and isinstance(prompt, list):
97
- len(prompt)
98
- else:
99
- prompt_embeds.shape[0]
100
-
101
- if prompt_embeds is None:
102
- text_inputs = self.tokenizer(
103
- prompt,
104
- padding="max_length",
105
- max_length=self.tokenizer.model_max_length,
106
- truncation=True,
107
- return_tensors="pt",
108
- )
109
- text_input_ids = text_inputs.input_ids
110
- untruncated_ids = self.tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
111
-
112
- if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
113
- text_input_ids, untruncated_ids
114
- ):
115
- removed_text = self.tokenizer.batch_decode(
116
- untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1]
117
- )
118
- logger.warning(
119
- "The following part of your input was truncated because CLIP can only handle sequences up to"
120
- f" {self.tokenizer.model_max_length} tokens: {removed_text}"
121
- )
122
-
123
- if hasattr(self.text_encoder.config, "use_attention_mask") and self.text_encoder.config.use_attention_mask:
124
- attention_mask = text_inputs.attention_mask.to(device)
125
- else:
126
- attention_mask = None
127
-
128
- prompt_embeds = self.text_encoder(
129
- text_input_ids.to(device),
130
- attention_mask=attention_mask,
131
- )
132
- prompt_embeds = prompt_embeds[0]
133
-
134
- if self.text_encoder is not None:
135
- prompt_embeds_dtype = self.text_encoder.dtype
136
- elif self.unet is not None:
137
- prompt_embeds_dtype = self.unet.dtype
138
- else:
139
- prompt_embeds_dtype = prompt_embeds.dtype
140
-
141
- prompt_embeds = prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
142
-
143
- bs_embed, seq_len, _ = prompt_embeds.shape
144
- # duplicate text embeddings for each generation per prompt, using mps friendly method
145
- prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
146
- prompt_embeds = prompt_embeds.view(bs_embed * num_images_per_prompt, seq_len, -1)
147
-
148
- # Don't need to get uncond prompt embedding because of LCM Guided Distillation
149
- return prompt_embeds
150
-
151
- def run_safety_checker(self, image, device, dtype):
152
- if self.safety_checker is None:
153
- has_nsfw_concept = None
154
- else:
155
- if torch.is_tensor(image):
156
- feature_extractor_input = self.image_processor.postprocess(image, output_type="pil")
157
- else:
158
- feature_extractor_input = self.image_processor.numpy_to_pil(image)
159
- safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt").to(device)
160
- image, has_nsfw_concept = self.safety_checker(
161
- images=image, clip_input=safety_checker_input.pixel_values.to(dtype)
162
- )
163
- return image, has_nsfw_concept
164
-
165
- def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, latents=None):
166
- shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
167
- if latents is None:
168
- latents = torch.randn(shape, dtype=dtype).to(device)
169
- else:
170
- latents = latents.to(device)
171
- # scale the initial noise by the standard deviation required by the scheduler
172
- latents = latents * self.scheduler.init_noise_sigma
173
- return latents
174
-
175
- def get_w_embedding(self, w, embedding_dim=512, dtype=torch.float32):
176
- """
177
- see https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
178
- Args:
179
- timesteps: torch.Tensor: generate embedding vectors at these timesteps
180
- embedding_dim: int: dimension of the embeddings to generate
181
- dtype: data type of the generated embeddings
182
- Returns:
183
- embedding vectors with shape `(len(timesteps), embedding_dim)`
184
- """
185
- assert len(w.shape) == 1
186
- w = w * 1000.0
187
-
188
- half_dim = embedding_dim // 2
189
- emb = torch.log(torch.tensor(10000.0)) / (half_dim - 1)
190
- emb = torch.exp(torch.arange(half_dim, dtype=dtype) * -emb)
191
- emb = w.to(dtype)[:, None] * emb[None, :]
192
- emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
193
- if embedding_dim % 2 == 1: # zero pad
194
- emb = torch.nn.functional.pad(emb, (0, 1))
195
- assert emb.shape == (w.shape[0], embedding_dim)
196
- return emb
197
-
198
- @torch.no_grad()
199
- def __call__(
200
- self,
201
- prompt: Union[str, List[str]] = None,
202
- height: Optional[int] = 768,
203
- width: Optional[int] = 768,
204
- guidance_scale: float = 7.5,
205
- num_images_per_prompt: Optional[int] = 1,
206
- latents: Optional[torch.FloatTensor] = None,
207
- num_inference_steps: int = 4,
208
- lcm_origin_steps: int = 50,
209
- prompt_embeds: Optional[torch.FloatTensor] = None,
210
- output_type: Optional[str] = "pil",
211
- return_dict: bool = True,
212
- cross_attention_kwargs: Optional[Dict[str, Any]] = None,
213
- ):
214
- # 0. Default height and width to unet
215
- height = height or self.unet.config.sample_size * self.vae_scale_factor
216
- width = width or self.unet.config.sample_size * self.vae_scale_factor
217
-
218
- # 2. Define call parameters
219
- if prompt is not None and isinstance(prompt, str):
220
- batch_size = 1
221
- elif prompt is not None and isinstance(prompt, list):
222
- batch_size = len(prompt)
223
- else:
224
- batch_size = prompt_embeds.shape[0]
225
-
226
- device = self._execution_device
227
- # do_classifier_free_guidance = guidance_scale > 0.0 # In LCM Implementation: cfg_noise = noise_cond + cfg_scale * (noise_cond - noise_uncond) , (cfg_scale > 0.0 using CFG)
228
-
229
- # 3. Encode input prompt
230
- prompt_embeds = self._encode_prompt(
231
- prompt,
232
- device,
233
- num_images_per_prompt,
234
- prompt_embeds=prompt_embeds,
235
- )
236
-
237
- # 4. Prepare timesteps
238
- self.scheduler.set_timesteps(num_inference_steps, lcm_origin_steps)
239
- timesteps = self.scheduler.timesteps
240
-
241
- # 5. Prepare latent variable
242
- num_channels_latents = self.unet.config.in_channels
243
- latents = self.prepare_latents(
244
- batch_size * num_images_per_prompt,
245
- num_channels_latents,
246
- height,
247
- width,
248
- prompt_embeds.dtype,
249
- device,
250
- latents,
251
- )
252
- bs = batch_size * num_images_per_prompt
253
-
254
- # 6. Get Guidance Scale Embedding
255
- w = torch.tensor(guidance_scale).repeat(bs)
256
- w_embedding = self.get_w_embedding(w, embedding_dim=256).to(device=device, dtype=latents.dtype)
257
-
258
- # 7. LCM MultiStep Sampling Loop:
259
- with self.progress_bar(total=num_inference_steps) as progress_bar:
260
- for i, t in enumerate(timesteps):
261
- ts = torch.full((bs,), t, device=device, dtype=torch.long)
262
- latents = latents.to(prompt_embeds.dtype)
263
-
264
- # model prediction (v-prediction, eps, x)
265
- model_pred = self.unet(
266
- latents,
267
- ts,
268
- timestep_cond=w_embedding,
269
- encoder_hidden_states=prompt_embeds,
270
- cross_attention_kwargs=cross_attention_kwargs,
271
- return_dict=False,
272
- )[0]
273
-
274
- # compute the previous noisy sample x_t -> x_t-1
275
- latents, denoised = self.scheduler.step(model_pred, i, t, latents, return_dict=False)
276
-
277
- # # call the callback, if provided
278
- # if i == len(timesteps) - 1:
279
- progress_bar.update()
280
-
281
- denoised = denoised.to(prompt_embeds.dtype)
282
- if not output_type == "latent":
283
- image = self.vae.decode(denoised / self.vae.config.scaling_factor, return_dict=False)[0]
284
- image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
285
- else:
286
- image = denoised
287
- has_nsfw_concept = None
288
-
289
- if has_nsfw_concept is None:
290
- do_denormalize = [True] * image.shape[0]
291
- else:
292
- do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
293
-
294
- image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
295
-
296
- if not return_dict:
297
- return (image, has_nsfw_concept)
298
-
299
- return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
300
-
301
-
302
- @dataclass
303
- # Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->DDIM
304
- class LCMSchedulerOutput(BaseOutput):
305
- """
306
- Output class for the scheduler's `step` function output.
307
- Args:
308
- prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
309
- Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
310
- denoising loop.
311
- pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
312
- The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
313
- `pred_original_sample` can be used to preview progress or for guidance.
314
- """
315
-
316
- prev_sample: torch.FloatTensor
317
- denoised: Optional[torch.FloatTensor] = None
318
-
319
-
320
- # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
321
- def betas_for_alpha_bar(
322
- num_diffusion_timesteps,
323
- max_beta=0.999,
324
- alpha_transform_type="cosine",
325
- ):
326
- """
327
- Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
328
- (1-beta) over time from t = [0,1].
329
- Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
330
- to that part of the diffusion process.
331
- Args:
332
- num_diffusion_timesteps (`int`): the number of betas to produce.
333
- max_beta (`float`): the maximum beta to use; use values lower than 1 to
334
- prevent singularities.
335
- alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
336
- Choose from `cosine` or `exp`
337
- Returns:
338
- betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
339
- """
340
- if alpha_transform_type == "cosine":
341
-
342
- def alpha_bar_fn(t):
343
- return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
344
-
345
- elif alpha_transform_type == "exp":
346
-
347
- def alpha_bar_fn(t):
348
- return math.exp(t * -12.0)
349
-
350
- else:
351
- raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
352
-
353
- betas = []
354
- for i in range(num_diffusion_timesteps):
355
- t1 = i / num_diffusion_timesteps
356
- t2 = (i + 1) / num_diffusion_timesteps
357
- betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
358
- return torch.tensor(betas, dtype=torch.float32)
359
-
360
-
361
- def rescale_zero_terminal_snr(betas):
362
- """
363
- Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
364
- Args:
365
- betas (`torch.FloatTensor`):
366
- the betas that the scheduler is being initialized with.
367
- Returns:
368
- `torch.FloatTensor`: rescaled betas with zero terminal SNR
369
- """
370
- # Convert betas to alphas_bar_sqrt
371
- alphas = 1.0 - betas
372
- alphas_cumprod = torch.cumprod(alphas, dim=0)
373
- alphas_bar_sqrt = alphas_cumprod.sqrt()
374
-
375
- # Store old values.
376
- alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
377
- alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
378
-
379
- # Shift so the last timestep is zero.
380
- alphas_bar_sqrt -= alphas_bar_sqrt_T
381
-
382
- # Scale so the first timestep is back to the old value.
383
- alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
384
-
385
- # Convert alphas_bar_sqrt to betas
386
- alphas_bar = alphas_bar_sqrt**2 # Revert sqrt
387
- alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod
388
- alphas = torch.cat([alphas_bar[0:1], alphas])
389
- betas = 1 - alphas
390
-
391
- return betas
392
-
393
-
394
- class LCMScheduler(SchedulerMixin, ConfigMixin):
395
- """
396
- `LCMScheduler` extends the denoising procedure introduced in denoising diffusion probabilistic models (DDPMs) with
397
- non-Markovian guidance.
398
- This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
399
- methods the library implements for all schedulers such as loading and saving.
400
- Args:
401
- num_train_timesteps (`int`, defaults to 1000):
402
- The number of diffusion steps to train the model.
403
- beta_start (`float`, defaults to 0.0001):
404
- The starting `beta` value of inference.
405
- beta_end (`float`, defaults to 0.02):
406
- The final `beta` value.
407
- beta_schedule (`str`, defaults to `"linear"`):
408
- The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
409
- `linear`, `scaled_linear`, or `squaredcos_cap_v2`.
410
- trained_betas (`np.ndarray`, *optional*):
411
- Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
412
- clip_sample (`bool`, defaults to `True`):
413
- Clip the predicted sample for numerical stability.
414
- clip_sample_range (`float`, defaults to 1.0):
415
- The maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
416
- set_alpha_to_one (`bool`, defaults to `True`):
417
- Each diffusion step uses the alphas product value at that step and at the previous one. For the final step
418
- there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`,
419
- otherwise it uses the alpha value at step 0.
420
- steps_offset (`int`, defaults to 0):
421
- An offset added to the inference steps. You can use a combination of `offset=1` and
422
- `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
423
- Diffusion.
424
- prediction_type (`str`, defaults to `epsilon`, *optional*):
425
- Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
426
- `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
427
- Video](https://imagen.research.google/video/paper.pdf) paper).
428
- thresholding (`bool`, defaults to `False`):
429
- Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
430
- as Stable Diffusion.
431
- dynamic_thresholding_ratio (`float`, defaults to 0.995):
432
- The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
433
- sample_max_value (`float`, defaults to 1.0):
434
- The threshold value for dynamic thresholding. Valid only when `thresholding=True`.
435
- timestep_spacing (`str`, defaults to `"leading"`):
436
- The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
437
- Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
438
- rescale_betas_zero_snr (`bool`, defaults to `False`):
439
- Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
440
- dark samples instead of limiting it to samples with medium brightness. Loosely related to
441
- [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
442
- """
443
-
444
- # _compatibles = [e.name for e in KarrasDiffusionSchedulers]
445
- order = 1
446
-
447
- @register_to_config
448
- def __init__(
449
- self,
450
- num_train_timesteps: int = 1000,
451
- beta_start: float = 0.0001,
452
- beta_end: float = 0.02,
453
- beta_schedule: str = "linear",
454
- trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
455
- clip_sample: bool = True,
456
- set_alpha_to_one: bool = True,
457
- steps_offset: int = 0,
458
- prediction_type: str = "epsilon",
459
- thresholding: bool = False,
460
- dynamic_thresholding_ratio: float = 0.995,
461
- clip_sample_range: float = 1.0,
462
- sample_max_value: float = 1.0,
463
- timestep_spacing: str = "leading",
464
- rescale_betas_zero_snr: bool = False,
465
- ):
466
- if trained_betas is not None:
467
- self.betas = torch.tensor(trained_betas, dtype=torch.float32)
468
- elif beta_schedule == "linear":
469
- self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
470
- elif beta_schedule == "scaled_linear":
471
- # this schedule is very specific to the latent diffusion model.
472
- self.betas = (
473
- torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
474
- )
475
- elif beta_schedule == "squaredcos_cap_v2":
476
- # Glide cosine schedule
477
- self.betas = betas_for_alpha_bar(num_train_timesteps)
478
- else:
479
- raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
480
-
481
- # Rescale for zero SNR
482
- if rescale_betas_zero_snr:
483
- self.betas = rescale_zero_terminal_snr(self.betas)
484
-
485
- self.alphas = 1.0 - self.betas
486
- self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
487
-
488
- # At every step in ddim, we are looking into the previous alphas_cumprod
489
- # For the final step, there is no previous alphas_cumprod because we are already at 0
490
- # `set_alpha_to_one` decides whether we set this parameter simply to one or
491
- # whether we use the final alpha of the "non-previous" one.
492
- self.final_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0]
493
-
494
- # standard deviation of the initial noise distribution
495
- self.init_noise_sigma = 1.0
496
-
497
- # setable values
498
- self.num_inference_steps = None
499
- self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
500
-
501
- def scale_model_input(self, sample: torch.FloatTensor, timestep: Optional[int] = None) -> torch.FloatTensor:
502
- """
503
- Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
504
- current timestep.
505
- Args:
506
- sample (`torch.FloatTensor`):
507
- The input sample.
508
- timestep (`int`, *optional*):
509
- The current timestep in the diffusion chain.
510
- Returns:
511
- `torch.FloatTensor`:
512
- A scaled input sample.
513
- """
514
- return sample
515
-
516
- def _get_variance(self, timestep, prev_timestep):
517
- alpha_prod_t = self.alphas_cumprod[timestep]
518
- alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
519
- beta_prod_t = 1 - alpha_prod_t
520
- beta_prod_t_prev = 1 - alpha_prod_t_prev
521
-
522
- variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev)
523
-
524
- return variance
525
-
526
- # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
527
- def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
528
- """
529
- "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
530
- prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
531
- s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
532
- pixels from saturation at each step. We find that dynamic thresholding results in significantly better
533
- photorealism as well as better image-text alignment, especially when using very large guidance weights."
534
- https://arxiv.org/abs/2205.11487
535
- """
536
- dtype = sample.dtype
537
- batch_size, channels, height, width = sample.shape
538
-
539
- if dtype not in (torch.float32, torch.float64):
540
- sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half
541
-
542
- # Flatten sample for doing quantile calculation along each image
543
- sample = sample.reshape(batch_size, channels * height * width)
544
-
545
- abs_sample = sample.abs() # "a certain percentile absolute pixel value"
546
-
547
- s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
548
- s = torch.clamp(
549
- s, min=1, max=self.config.sample_max_value
550
- ) # When clamped to min=1, equivalent to standard clipping to [-1, 1]
551
-
552
- s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0
553
- sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s"
554
-
555
- sample = sample.reshape(batch_size, channels, height, width)
556
- sample = sample.to(dtype)
557
-
558
- return sample
559
-
560
- def set_timesteps(self, num_inference_steps: int, lcm_origin_steps: int, device: Union[str, torch.device] = None):
561
- """
562
- Sets the discrete timesteps used for the diffusion chain (to be run before inference).
563
- Args:
564
- num_inference_steps (`int`):
565
- The number of diffusion steps used when generating samples with a pre-trained model.
566
- """
567
-
568
- if num_inference_steps > self.config.num_train_timesteps:
569
- raise ValueError(
570
- f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:"
571
- f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle"
572
- f" maximal {self.config.num_train_timesteps} timesteps."
573
- )
574
-
575
- self.num_inference_steps = num_inference_steps
576
-
577
- # LCM Timesteps Setting: # Linear Spacing
578
- c = self.config.num_train_timesteps // lcm_origin_steps
579
- lcm_origin_timesteps = np.asarray(list(range(1, lcm_origin_steps + 1))) * c - 1 # LCM Training Steps Schedule
580
- skipping_step = len(lcm_origin_timesteps) // num_inference_steps
581
- timesteps = lcm_origin_timesteps[::-skipping_step][:num_inference_steps] # LCM Inference Steps Schedule
582
-
583
- self.timesteps = torch.from_numpy(timesteps.copy()).to(device)
584
-
585
- def get_scalings_for_boundary_condition_discrete(self, t):
586
- self.sigma_data = 0.5 # Default: 0.5
587
-
588
- # By dividing 0.1: This is almost a delta function at t=0.
589
- c_skip = self.sigma_data**2 / ((t / 0.1) ** 2 + self.sigma_data**2)
590
- c_out = (t / 0.1) / ((t / 0.1) ** 2 + self.sigma_data**2) ** 0.5
591
- return c_skip, c_out
592
-
593
- def step(
594
- self,
595
- model_output: torch.FloatTensor,
596
- timeindex: int,
597
- timestep: int,
598
- sample: torch.FloatTensor,
599
- eta: float = 0.0,
600
- use_clipped_model_output: bool = False,
601
- generator=None,
602
- variance_noise: Optional[torch.FloatTensor] = None,
603
- return_dict: bool = True,
604
- ) -> Union[LCMSchedulerOutput, Tuple]:
605
- """
606
- Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
607
- process from the learned model outputs (most often the predicted noise).
608
- Args:
609
- model_output (`torch.FloatTensor`):
610
- The direct output from learned diffusion model.
611
- timestep (`float`):
612
- The current discrete timestep in the diffusion chain.
613
- sample (`torch.FloatTensor`):
614
- A current instance of a sample created by the diffusion process.
615
- eta (`float`):
616
- The weight of noise for added noise in diffusion step.
617
- use_clipped_model_output (`bool`, defaults to `False`):
618
- If `True`, computes "corrected" `model_output` from the clipped predicted original sample. Necessary
619
- because predicted original sample is clipped to [-1, 1] when `self.config.clip_sample` is `True`. If no
620
- clipping has happened, "corrected" `model_output` would coincide with the one provided as input and
621
- `use_clipped_model_output` has no effect.
622
- generator (`torch.Generator`, *optional*):
623
- A random number generator.
624
- variance_noise (`torch.FloatTensor`):
625
- Alternative to generating noise with `generator` by directly providing the noise for the variance
626
- itself. Useful for methods such as [`CycleDiffusion`].
627
- return_dict (`bool`, *optional*, defaults to `True`):
628
- Whether or not to return a [`~schedulers.scheduling_lcm.LCMSchedulerOutput`] or `tuple`.
629
- Returns:
630
- [`~schedulers.scheduling_utils.LCMSchedulerOutput`] or `tuple`:
631
- If return_dict is `True`, [`~schedulers.scheduling_lcm.LCMSchedulerOutput`] is returned, otherwise a
632
- tuple is returned where the first element is the sample tensor.
633
- """
634
- if self.num_inference_steps is None:
635
- raise ValueError(
636
- "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
637
- )
638
-
639
- # 1. get previous step value
640
- prev_timeindex = timeindex + 1
641
- if prev_timeindex < len(self.timesteps):
642
- prev_timestep = self.timesteps[prev_timeindex]
643
- else:
644
- prev_timestep = timestep
645
-
646
- # 2. compute alphas, betas
647
- alpha_prod_t = self.alphas_cumprod[timestep]
648
- alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
649
-
650
- beta_prod_t = 1 - alpha_prod_t
651
- beta_prod_t_prev = 1 - alpha_prod_t_prev
652
-
653
- # 3. Get scalings for boundary conditions
654
- c_skip, c_out = self.get_scalings_for_boundary_condition_discrete(timestep)
655
-
656
- # 4. Different Parameterization:
657
- parameterization = self.config.prediction_type
658
-
659
- if parameterization == "epsilon": # noise-prediction
660
- pred_x0 = (sample - beta_prod_t.sqrt() * model_output) / alpha_prod_t.sqrt()
661
-
662
- elif parameterization == "sample": # x-prediction
663
- pred_x0 = model_output
664
-
665
- elif parameterization == "v_prediction": # v-prediction
666
- pred_x0 = alpha_prod_t.sqrt() * sample - beta_prod_t.sqrt() * model_output
667
-
668
- # 4. Denoise model output using boundary conditions
669
- denoised = c_out * pred_x0 + c_skip * sample
670
-
671
- # 5. Sample z ~ N(0, I), For MultiStep Inference
672
- # Noise is not used for one-step sampling.
673
- if len(self.timesteps) > 1:
674
- noise = torch.randn(model_output.shape).to(model_output.device)
675
- prev_sample = alpha_prod_t_prev.sqrt() * denoised + beta_prod_t_prev.sqrt() * noise
676
- else:
677
- prev_sample = denoised
678
-
679
- if not return_dict:
680
- return (prev_sample, denoised)
681
-
682
- return LCMSchedulerOutput(prev_sample=prev_sample, denoised=denoised)
683
-
684
- # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise
685
- def add_noise(
686
- self,
687
- original_samples: torch.FloatTensor,
688
- noise: torch.FloatTensor,
689
- timesteps: torch.IntTensor,
690
- ) -> torch.FloatTensor:
691
- # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
692
- alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
693
- timesteps = timesteps.to(original_samples.device)
694
-
695
- sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
696
- sqrt_alpha_prod = sqrt_alpha_prod.flatten()
697
- while len(sqrt_alpha_prod.shape) < len(original_samples.shape):
698
- sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
699
-
700
- sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
701
- sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
702
- while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape):
703
- sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
704
-
705
- noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
706
- return noisy_samples
707
-
708
- # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
709
- def get_velocity(
710
- self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
711
- ) -> torch.FloatTensor:
712
- # Make sure alphas_cumprod and timestep have same device and dtype as sample
713
- alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
714
- timesteps = timesteps.to(sample.device)
715
-
716
- sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
717
- sqrt_alpha_prod = sqrt_alpha_prod.flatten()
718
- while len(sqrt_alpha_prod.shape) < len(sample.shape):
719
- sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
720
-
721
- sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
722
- sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
723
- while len(sqrt_one_minus_alpha_prod.shape) < len(sample.shape):
724
- sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
725
-
726
- velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
727
- return velocity
728
-
729
- def __len__(self):
730
- return self.config.num_train_timesteps
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/webui/__pycache__/realtime_ui.cpython-311.pyc ADDED
Binary file (6.63 kB). View file
 
frontend/webui/__pycache__/text_to_image_ui.cpython-311.pyc ADDED
Binary file (6.34 kB). View file
 
frontend/webui/__pycache__/ui.cpython-311.pyc ADDED
Binary file (2.39 kB). View file
 
frontend/webui/realtime_ui.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from backend.lcm_text_to_image import LCMTextToImage
3
+ from backend.models.lcmdiffusion_setting import LCMLora, LCMDiffusionSetting
4
+ from constants import DEVICE, LCM_DEFAULT_MODEL_OPENVINO
5
+ from time import perf_counter
6
+ import numpy as np
7
+ from cv2 import imencode
8
+ import base64
9
+ from backend.device import get_device_name
10
+ from constants import APP_VERSION
11
+ from backend.device import is_openvino_device
12
+
13
+ lcm_text_to_image = LCMTextToImage()
14
+ lcm_lora = LCMLora(
15
+ base_model_id="Lykon/dreamshaper-7",
16
+ lcm_lora_id="latent-consistency/lcm-lora-sdv1-5",
17
+ )
18
+
19
+
20
+ # https://github.com/gradio-app/gradio/issues/2635#issuecomment-1423531319
21
+ def encode_pil_to_base64_new(pil_image):
22
+ image_arr = np.asarray(pil_image)[:, :, ::-1]
23
+ _, byte_data = imencode(".png", image_arr)
24
+ base64_data = base64.b64encode(byte_data)
25
+ base64_string_opencv = base64_data.decode("utf-8")
26
+ return "data:image/png;base64," + base64_string_opencv
27
+
28
+
29
+ # monkey patching encode pil
30
+ gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new
31
+
32
+
33
+ def predict(
34
+ prompt,
35
+ steps,
36
+ seed,
37
+ ):
38
+ lcm_text_to_image.init(
39
+ model_id=LCM_DEFAULT_MODEL_OPENVINO,
40
+ use_lora=True,
41
+ lcm_lora=lcm_lora,
42
+ use_openvino=True if is_openvino_device() else False,
43
+ )
44
+
45
+ lcm_diffusion_setting = LCMDiffusionSetting()
46
+ lcm_diffusion_setting.prompt = prompt
47
+ lcm_diffusion_setting.guidance_scale = 1.0
48
+ lcm_diffusion_setting.inference_steps = steps
49
+ lcm_diffusion_setting.seed = seed
50
+ lcm_diffusion_setting.use_seed = True
51
+ lcm_diffusion_setting.image_width = 256 if is_openvino_device() else 512
52
+ lcm_diffusion_setting.image_height = 256 if is_openvino_device() else 512
53
+ lcm_diffusion_setting.use_openvino = True if is_openvino_device() else False
54
+ start = perf_counter()
55
+ images = lcm_text_to_image.generate(lcm_diffusion_setting)
56
+ latency = perf_counter() - start
57
+ print(f"Latency: {latency:.2f} seconds")
58
+ return images[0]
59
+
60
+
61
+ css = """
62
+ #container{
63
+ margin: 0 auto;
64
+ max-width: 40rem;
65
+ }
66
+ #intro{
67
+ max-width: 100%;
68
+ text-align: center;
69
+ margin: 0 auto;
70
+ }
71
+ #generate_button {
72
+ color: white;
73
+ border-color: #007bff;
74
+ background: #007bff;
75
+ width: 200px;
76
+ height: 50px;
77
+ }
78
+ footer {
79
+ visibility: hidden
80
+ }
81
+ """
82
+
83
+
84
+ def _get_footer_message() -> str:
85
+ version = f"<center><p> {APP_VERSION} "
86
+ footer_msg = version + (
87
+ ' © 2023 <a href="https://github.com/rupeshs">'
88
+ " Rupesh Sreeraman</a></p></center>"
89
+ )
90
+ return footer_msg
91
+
92
+
93
+ with gr.Blocks(css=css) as demo:
94
+ with gr.Column(elem_id="container"):
95
+ use_openvino = "- OpenVINO" if is_openvino_device() else ""
96
+ gr.Markdown(
97
+ f"""# Realtime FastSD CPU {use_openvino}
98
+ **Device : {DEVICE} , {get_device_name()}**
99
+ """,
100
+ elem_id="intro",
101
+ )
102
+
103
+ with gr.Row():
104
+ with gr.Row():
105
+ prompt = gr.Textbox(
106
+ placeholder="Describe the image you'd like to see",
107
+ scale=5,
108
+ container=False,
109
+ )
110
+ generate_btn = gr.Button(
111
+ "Generate",
112
+ scale=1,
113
+ elem_id="generate_button",
114
+ )
115
+
116
+ image = gr.Image(type="filepath")
117
+ with gr.Accordion("Advanced options", open=False):
118
+ steps = gr.Slider(
119
+ label="Steps",
120
+ value=4 if is_openvino_device() else 3,
121
+ minimum=1,
122
+ maximum=6,
123
+ step=1,
124
+ )
125
+ seed = gr.Slider(
126
+ randomize=True,
127
+ minimum=0,
128
+ maximum=999999999,
129
+ label="Seed",
130
+ step=1,
131
+ )
132
+ gr.HTML(_get_footer_message())
133
+
134
+ inputs = [prompt, steps, seed]
135
+ prompt.input(fn=predict, inputs=inputs, outputs=image, show_progress=False)
136
+ generate_btn.click(
137
+ fn=predict, inputs=inputs, outputs=image, show_progress=False
138
+ )
139
+ steps.change(fn=predict, inputs=inputs, outputs=image, show_progress=False)
140
+ seed.change(fn=predict, inputs=inputs, outputs=image, show_progress=False)
141
+
142
+
143
+ def start_realtime_text_to_image(share=False):
144
+ demo.queue()
145
+ demo.launch(share=share)
requirements.txt CHANGED
@@ -1,14 +1,16 @@
1
  accelerate==0.23.0
2
- diffusers==0.21.4
3
- transformers==4.34.0
4
  PyQt5
5
  Pillow==9.4.0
6
- openvino==2023.1.0
7
- optimum-intel==1.11.0
8
- onnx==1.14.1
 
9
  onnxruntime==1.16.1
10
  pydantic==2.4.2
11
  typing-extensions==4.8.0
12
- pyyaml
13
  gradio==3.39.0
14
- super-image==0.1.7
 
 
1
  accelerate==0.23.0
2
+ diffusers==0.23.0
3
+ transformers==4.35.0
4
  PyQt5
5
  Pillow==9.4.0
6
+ openvino==2023.2.0
7
+ optimum==1.14.0
8
+ optimum-intel==1.12.1
9
+ onnx==1.15.0
10
  onnxruntime==1.16.1
11
  pydantic==2.4.2
12
  typing-extensions==4.8.0
13
+ pyyaml==6.0.1
14
  gradio==3.39.0
15
+ peft==0.6.1
16
+ opencv-python==4.8.1.78