AYYasaswini commited on
Commit
81778ab
·
verified ·
1 Parent(s): 4e58e1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -51
app.py CHANGED
@@ -48,33 +48,12 @@ If all you want is to make a picture with some text, you could ignore this noteb
48
  What we want to do in this notebook is dig a little deeper into how this works, so we'll start by checking that the example code runs. Again, this is adapted from the [HF notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_diffusion.ipynb) and looks very similar to what you'll find if you inspect [the `__call__()` method of the stable diffusion pipeline](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L200).
49
  """
50
 
51
- # Some settings
52
- prompt = ["A watercolor painting of an otter"]
53
- height = 512 # default height of Stable Diffusion
54
- width = 512 # default width of Stable Diffusion
55
- num_inference_steps = 30 # Number of denoising steps
56
- guidance_scale = 7.5 # Scale for classifier-free guidance
57
- generator = torch.manual_seed(32) # Seed generator to create the inital latent noise
58
- batch_size = 1
59
-
60
- # Prep text
61
- text_input = tokenizer(prompt, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
62
- with torch.no_grad():
63
- text_embeddings = text_encoder(text_input.input_ids.to(torch_device))[0]
64
- max_length = text_input.input_ids.shape[-1]
65
- uncond_input = tokenizer(
66
- [""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
67
- )
68
- with torch.no_grad():
69
- uncond_embeddings = text_encoder(uncond_input.input_ids.to(torch_device))[0]
70
- text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
71
 
72
  # Prep Scheduler
73
  def set_timesteps(scheduler, num_inference_steps):
74
  scheduler.set_timesteps(num_inference_steps)
75
  scheduler.timesteps = scheduler.timesteps.to(torch.float32) # minor fix to ensure MPS compatibility, fixed in diffusers PR 3925
76
 
77
- set_timesteps(scheduler,num_inference_steps)
78
 
79
  # Prep latents
80
  latents = torch.randn(
@@ -87,36 +66,6 @@ latents = latents * scheduler.init_noise_sigma # Scaling (previous versions did
87
  # Loop
88
  with autocast("cuda"): # will fallback to CPU if no CUDA; no autocast for MPS
89
  for i, t in tqdm(enumerate(scheduler.timesteps), total=len(scheduler.timesteps)):
90
- # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes.
91
- latent_model_input = torch.cat([latents] * 2)
92
- sigma = scheduler.sigmas[i]
93
- # Scale the latents (preconditioning):
94
- # latent_model_input = latent_model_input / ((sigma**2 + 1) ** 0.5) # Diffusers 0.3 and below
95
- latent_model_input = scheduler.scale_model_input(latent_model_input, t)
96
-
97
- # predict the noise residual
98
- with torch.no_grad():
99
- noise_pred = unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample
100
-
101
- # perform guidance
102
- noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
103
- noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
104
-
105
- # compute the previous noisy sample x_t -> x_t-1
106
- # latents = scheduler.step(noise_pred, i, latents)["prev_sample"] # Diffusers 0.3 and below
107
- latents = scheduler.step(noise_pred, t, latents).prev_sample
108
-
109
- # scale and decode the image latents with vae
110
- latents = 1 / 0.18215 * latents
111
- with torch.no_grad():
112
- image = vae.decode(latents).sample
113
-
114
- # Display
115
- image = (image / 2 + 0.5).clamp(0, 1)
116
- image = image.detach().cpu().permute(0, 2, 3, 1).numpy()
117
- images = (image * 255).round().astype("uint8")
118
- pil_images = [Image.fromarray(image) for image in images]
119
- pil_images[0]
120
 
121
  """It's working, but that's quite a bit of code! Let's look at the components one by one.
122
 
@@ -187,6 +136,7 @@ We use a text encoder model to turn our text into a set of 'embeddings' which ar
187
  # Our text prompt
188
  prompt = 'A picture of a puppy'
189
 
 
190
  """We begin with tokenization:"""
191
 
192
  # Turn the text into a sequnce of tokens:
 
48
  What we want to do in this notebook is dig a little deeper into how this works, so we'll start by checking that the example code runs. Again, this is adapted from the [HF notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_diffusion.ipynb) and looks very similar to what you'll find if you inspect [the `__call__()` method of the stable diffusion pipeline](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L200).
49
  """
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  # Prep Scheduler
53
  def set_timesteps(scheduler, num_inference_steps):
54
  scheduler.set_timesteps(num_inference_steps)
55
  scheduler.timesteps = scheduler.timesteps.to(torch.float32) # minor fix to ensure MPS compatibility, fixed in diffusers PR 3925
56
 
 
57
 
58
  # Prep latents
59
  latents = torch.randn(
 
66
  # Loop
67
  with autocast("cuda"): # will fallback to CPU if no CUDA; no autocast for MPS
68
  for i, t in tqdm(enumerate(scheduler.timesteps), total=len(scheduler.timesteps)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  """It's working, but that's quite a bit of code! Let's look at the components one by one.
71
 
 
136
  # Our text prompt
137
  prompt = 'A picture of a puppy'
138
 
139
+
140
  """We begin with tokenization:"""
141
 
142
  # Turn the text into a sequnce of tokens: