Commit
·
3df6611
1
Parent(s):
6304c5b
Adding changes to speed up the diffussion process
Browse files- app.py +38 -15
- requirements.txt +2 -1
app.py
CHANGED
@@ -67,11 +67,11 @@ def image_loss(images, loss_type, device, elastic_transformer):
|
|
67 |
else:
|
68 |
return torch.tensor(0.0).to(device)
|
69 |
|
70 |
-
# Update configuration
|
71 |
-
height, width =
|
72 |
-
guidance_scale =
|
73 |
-
num_inference_steps =
|
74 |
-
loss_scale =
|
75 |
|
76 |
def generate_images(prompt, concept):
|
77 |
global pipe, device, elastic_transformer
|
@@ -89,9 +89,10 @@ def generate_images(prompt, concept):
|
|
89 |
progress = gr.Progress()
|
90 |
|
91 |
for idx, loss_type in enumerate(loss_functions):
|
92 |
-
progress(idx/len(loss_functions), f"Generating {loss_type} image...")
|
93 |
-
|
94 |
try:
|
|
|
|
|
|
|
95 |
# Better memory management
|
96 |
if torch.cuda.is_available():
|
97 |
torch.cuda.empty_cache()
|
@@ -180,8 +181,30 @@ def generate_images(prompt, concept):
|
|
180 |
|
181 |
latents = latents.detach() - cond_grad * sigma**2
|
182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
latents = scheduler.step(noise_pred, t, latents).prev_sample
|
184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
# Proper latent to image conversion
|
186 |
latents = (1 / 0.18215) * latents
|
187 |
with torch.no_grad():
|
@@ -230,7 +253,7 @@ def create_interface():
|
|
230 |
gr.Dropdown(choices=concepts, label="Select SD Concept")
|
231 |
],
|
232 |
outputs=gr.Gallery(
|
233 |
-
label="Generated Images",
|
234 |
show_label=True,
|
235 |
elem_id="gallery",
|
236 |
columns=5,
|
@@ -238,12 +261,12 @@ def create_interface():
|
|
238 |
height="auto"
|
239 |
),
|
240 |
title="Stable Diffusion using Text Inversion",
|
241 |
-
description="""Generate images using Stable Diffusion with different style concepts. The
|
242 |
-
1. Original Image (No Loss)
|
243 |
-
2. Blue Channel Loss -
|
244 |
-
3. Elastic Loss -
|
245 |
-
4. Symmetry Loss -
|
246 |
-
5. Saturation Loss -
|
247 |
|
248 |
Note: Image generation may take several minutes. Please be patient while the images are being processed.""",
|
249 |
cache_examples=False,
|
|
|
67 |
else:
|
68 |
return torch.tensor(0.0).to(device)
|
69 |
|
70 |
+
# Update configuration for faster generation
|
71 |
+
height, width = 384, 384 # Reduced from 512x512 to 384x384
|
72 |
+
guidance_scale = 7.5
|
73 |
+
num_inference_steps = 30
|
74 |
+
loss_scale = 150
|
75 |
|
76 |
def generate_images(prompt, concept):
|
77 |
global pipe, device, elastic_transformer
|
|
|
89 |
progress = gr.Progress()
|
90 |
|
91 |
for idx, loss_type in enumerate(loss_functions):
|
|
|
|
|
92 |
try:
|
93 |
+
# Add detailed progress reporting
|
94 |
+
progress(idx/len(loss_functions), f"Starting {loss_type} image generation...")
|
95 |
+
|
96 |
# Better memory management
|
97 |
if torch.cuda.is_available():
|
98 |
torch.cuda.empty_cache()
|
|
|
181 |
|
182 |
latents = latents.detach() - cond_grad * sigma**2
|
183 |
|
184 |
+
# Diffusion process with progress updates
|
185 |
+
for i, t in enumerate(scheduler.timesteps):
|
186 |
+
current_progress = (idx + (i / len(scheduler.timesteps))) / len(loss_functions)
|
187 |
+
progress(current_progress, f"Generating {loss_type} image: Step {i+1}/{len(scheduler.timesteps)}")
|
188 |
+
|
189 |
+
# Apply loss less frequently for speed
|
190 |
+
if loss_type != 'none' and i % 8 == 0: # Changed from 5 to 8
|
191 |
+
with torch.set_grad_enabled(True):
|
192 |
+
# Enable gradients for images
|
193 |
+
denoised_images = pipe.vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5
|
194 |
+
denoised_images = denoised_images.requires_grad_() # Enable gradients for images
|
195 |
+
loss = image_loss(denoised_images, loss_type, device, elastic_transformer)
|
196 |
+
cond_grad = torch.autograd.grad(loss * loss_scale, latents)[0]
|
197 |
+
|
198 |
+
latents = latents.detach() - cond_grad * sigma**2
|
199 |
+
|
200 |
latents = scheduler.step(noise_pred, t, latents).prev_sample
|
201 |
+
|
202 |
+
# Clear CUDA cache more efficiently
|
203 |
+
if torch.cuda.is_available() and i % 10 == 0:
|
204 |
+
torch.cuda.empty_cache()
|
205 |
+
|
206 |
+
progress(idx/len(loss_functions), f"Finalizing {loss_type} image...")
|
207 |
+
|
208 |
# Proper latent to image conversion
|
209 |
latents = (1 / 0.18215) * latents
|
210 |
with torch.no_grad():
|
|
|
253 |
gr.Dropdown(choices=concepts, label="Select SD Concept")
|
254 |
],
|
255 |
outputs=gr.Gallery(
|
256 |
+
label="Generated Images (From Left to Right: Original, Blue Channel, Elastic, Symmetry, Saturation)",
|
257 |
show_label=True,
|
258 |
elem_id="gallery",
|
259 |
columns=5,
|
|
|
261 |
height="auto"
|
262 |
),
|
263 |
title="Stable Diffusion using Text Inversion",
|
264 |
+
description="""Generate images using Stable Diffusion with different style concepts. The gallery shows 5 images in this order:
|
265 |
+
1. Left-most: Original Image (No Loss) - Base generation without modifications
|
266 |
+
2. Second: Blue Channel Loss - Enhanced blue tones for atmospheric effects
|
267 |
+
3. Middle: Elastic Loss - Added elastic deformation for artistic distortion
|
268 |
+
4. Fourth: Symmetry Loss - Enforced symmetrical features
|
269 |
+
5. Right-most: Saturation Loss - Modified color saturation for vibrant effects
|
270 |
|
271 |
Note: Image generation may take several minutes. Please be patient while the images are being processed.""",
|
272 |
cache_examples=False,
|
requirements.txt
CHANGED
@@ -4,4 +4,5 @@ transformers
|
|
4 |
gradio
|
5 |
torchvision
|
6 |
Pillow
|
7 |
-
scipy
|
|
|
|
4 |
gradio
|
5 |
torchvision
|
6 |
Pillow
|
7 |
+
scipy
|
8 |
+
accelerate
|