Spaces:

MilindChawre
/

stable-diffusion-using-text-inversion

Running

App Files Files Community

MilindChawre commited on Mar 21

Commit

3df6611

1 Parent(s): 6304c5b

Adding changes to speed up the diffussion process

Browse files

Files changed (2) hide show

app.py +38 -15
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -67,11 +67,11 @@ def image_loss(images, loss_type, device, elastic_transformer):
     else:
         return torch.tensor(0.0).to(device)
-# Update configuration
-height, width = 512, 512
-guidance_scale = 8
-num_inference_steps = 50
-loss_scale = 200
 def generate_images(prompt, concept):
     global pipe, device, elastic_transformer
@@ -89,9 +89,10 @@ def generate_images(prompt, concept):
     progress = gr.Progress()
     for idx, loss_type in enumerate(loss_functions):
-        progress(idx/len(loss_functions), f"Generating {loss_type} image...")
         try:
             # Better memory management
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
@@ -180,8 +181,30 @@ def generate_images(prompt, concept):
                     latents = latents.detach() - cond_grad * sigma**2
                 latents = scheduler.step(noise_pred, t, latents).prev_sample
             # Proper latent to image conversion
             latents = (1 / 0.18215) * latents
             with torch.no_grad():
@@ -230,7 +253,7 @@ def create_interface():
             gr.Dropdown(choices=concepts, label="Select SD Concept")
         ],
         outputs=gr.Gallery(
-            label="Generated Images",
             show_label=True,
             elem_id="gallery",
             columns=5,
@@ -238,12 +261,12 @@ def create_interface():
             height="auto"
         ),
         title="Stable Diffusion using Text Inversion",
-        description="""Generate images using Stable Diffusion with different style concepts. The output shows 5 images side by side:
-        1. Original Image (No Loss)
-        2. Blue Channel Loss - Enhances blue tones
-        3. Elastic Loss - Adds elastic deformation
-        4. Symmetry Loss - Enforces symmetrical features
-        5. Saturation Loss - Modifies color saturation
         Note: Image generation may take several minutes. Please be patient while the images are being processed.""",
         cache_examples=False,

     else:
         return torch.tensor(0.0).to(device)
+# Update configuration for faster generation
+height, width = 384, 384  # Reduced from 512x512 to 384x384
+guidance_scale = 7.5
+num_inference_steps = 30
+loss_scale = 150
 def generate_images(prompt, concept):
     global pipe, device, elastic_transformer
     progress = gr.Progress()
     for idx, loss_type in enumerate(loss_functions):
         try:
+            # Add detailed progress reporting
+            progress(idx/len(loss_functions), f"Starting {loss_type} image generation...")
             # Better memory management
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
                     latents = latents.detach() - cond_grad * sigma**2
+                # Diffusion process with progress updates
+                for i, t in enumerate(scheduler.timesteps):
+                    current_progress = (idx + (i / len(scheduler.timesteps))) / len(loss_functions)
+                    progress(current_progress, f"Generating {loss_type} image: Step {i+1}/{len(scheduler.timesteps)}")
+                    # Apply loss less frequently for speed
+                    if loss_type != 'none' and i % 8 == 0:  # Changed from 5 to 8
+                        with torch.set_grad_enabled(True):
+                            # Enable gradients for images
+                            denoised_images = pipe.vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5
+                            denoised_images = denoised_images.requires_grad_()  # Enable gradients for images
+                            loss = image_loss(denoised_images, loss_type, device, elastic_transformer)
+                            cond_grad = torch.autograd.grad(loss * loss_scale, latents)[0]
+                        latents = latents.detach() - cond_grad * sigma**2
                 latents = scheduler.step(noise_pred, t, latents).prev_sample
+                # Clear CUDA cache more efficiently
+                if torch.cuda.is_available() and i % 10 == 0:
+                    torch.cuda.empty_cache()
+                progress(idx/len(loss_functions), f"Finalizing {loss_type} image...")
             # Proper latent to image conversion
             latents = (1 / 0.18215) * latents
             with torch.no_grad():
             gr.Dropdown(choices=concepts, label="Select SD Concept")
         ],
         outputs=gr.Gallery(
+            label="Generated Images (From Left to Right: Original, Blue Channel, Elastic, Symmetry, Saturation)",
             show_label=True,
             elem_id="gallery",
             columns=5,
             height="auto"
         ),
         title="Stable Diffusion using Text Inversion",
+        description="""Generate images using Stable Diffusion with different style concepts. The gallery shows 5 images in this order:
+        1. Left-most: Original Image (No Loss) - Base generation without modifications
+        2. Second: Blue Channel Loss - Enhanced blue tones for atmospheric effects
+        3. Middle: Elastic Loss - Added elastic deformation for artistic distortion
+        4. Fourth: Symmetry Loss - Enforced symmetrical features
+        5. Right-most: Saturation Loss - Modified color saturation for vibrant effects
         Note: Image generation may take several minutes. Please be patient while the images are being processed.""",
         cache_examples=False,

requirements.txt CHANGED Viewed

@@ -4,4 +4,5 @@ transformers
 gradio
 torchvision
 Pillow
-scipy

 gradio
 torchvision
 Pillow
+scipy
+accelerate