venkyyuvy commited on
Commit
56f25c1
1 Parent(s): c76d87f

device fix

Browse files
Files changed (2) hide show
  1. image_generator.py +4 -10
  2. utils.py +12 -1
image_generator.py CHANGED
@@ -7,8 +7,6 @@ from transformers import CLIPTextModel, CLIPTokenizer, logging
7
 
8
  from utils import load_embedding_bin, set_timesteps, latents_to_pil
9
  from loss import blue_loss, cosine_loss
10
- from matplotlib import pyplot as plt
11
- from pathlib import Path
12
 
13
  torch.manual_seed(11)
14
  logging.set_verbosity_error()
@@ -43,10 +41,9 @@ vae = AutoencoderKL.from_pretrained(
43
  #
44
  # # Load the tokenizer and text encoder to tokenize and encode the text.
45
  tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
46
- text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14").to(
47
- torch_device
48
- )
49
- #
50
  # # The UNet model for generating the latents.
51
  unet = UNet2DConditionModel.from_pretrained(
52
  "CompVis/stable-diffusion-v1-4", subfolder="unet"
@@ -60,9 +57,6 @@ scheduler = LMSDiscreteScheduler(
60
  num_train_timesteps=1000,
61
  )
62
 
63
- # vae = vae
64
- # text_encoder = text_encoder.to(torch_device)
65
- unet = unet
66
  token_emb_layer = text_encoder.text_model.embeddings.token_embedding
67
  pos_emb_layer = text_encoder.text_model.embeddings.position_embedding
68
  position_ids = text_encoder.text_model.embeddings.position_ids[:, :77]
@@ -227,7 +221,7 @@ def generate_image_from_embeddings(
227
  )
228
 
229
  #### ADDITIONAL GUIDANCE ###
230
- if i % 2 == 0:
231
  # Requires grad on the latents
232
  latents = latents.detach().requires_grad_()
233
 
 
7
 
8
  from utils import load_embedding_bin, set_timesteps, latents_to_pil
9
  from loss import blue_loss, cosine_loss
 
 
10
 
11
  torch.manual_seed(11)
12
  logging.set_verbosity_error()
 
41
  #
42
  # # Load the tokenizer and text encoder to tokenize and encode the text.
43
  tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
44
+ text_encoder = CLIPTextModel.from_pretrained(
45
+ "openai/clip-vit-large-patch14").to(torch_device)
46
+
 
47
  # # The UNet model for generating the latents.
48
  unet = UNet2DConditionModel.from_pretrained(
49
  "CompVis/stable-diffusion-v1-4", subfolder="unet"
 
57
  num_train_timesteps=1000,
58
  )
59
 
 
 
 
60
  token_emb_layer = text_encoder.text_model.embeddings.token_embedding
61
  pos_emb_layer = text_encoder.text_model.embeddings.position_embedding
62
  position_ids = text_encoder.text_model.embeddings.position_ids[:, :77]
 
221
  )
222
 
223
  #### ADDITIONAL GUIDANCE ###
224
+ if i % 5 == 0:
225
  # Requires grad on the latents
226
  latents = latents.detach().requires_grad_()
227
 
utils.py CHANGED
@@ -1,8 +1,19 @@
 
1
  import torch
2
  from PIL import Image
3
  from diffusers import AutoencoderKL
4
 
5
- vae = AutoencoderKL.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="vae").to("mps:0")
 
 
 
 
 
 
 
 
 
 
6
 
7
  def pil_to_latent(input_im):
8
  # Single image -> single latent in a batch (so size 1, 4, 64, 64)
 
1
+ import os
2
  import torch
3
  from PIL import Image
4
  from diffusers import AutoencoderKL
5
 
6
+ torch_device = (
7
+ "cuda"
8
+ if torch.cuda.is_available()
9
+ else "mps"
10
+ if torch.backends.mps.is_available()
11
+ else "cpu"
12
+ )
13
+ if "mps" == torch_device:
14
+ os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
15
+
16
+ vae = AutoencoderKL.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="vae").to(torch_device)
17
 
18
  def pil_to_latent(input_im):
19
  # Single image -> single latent in a batch (so size 1, 4, 64, 64)