Spaces:

MilindChawre
/

stable-diffusion-using-text-inversion

Running

App Files Files Community

MilindChawre commited on Mar 19

Commit

45b110b

1 Parent(s): a240ddb

Adding code for stable diffusion using text inversion

Browse files

Files changed (3) hide show

README.md +55 -1
app.py +219 -0
requirements.txt +7 -0

README.md CHANGED Viewed

@@ -10,4 +10,58 @@ pinned: false
 short_description: Stable Diffusion using Text Inversion
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 short_description: Stable Diffusion using Text Inversion
 ---
+# Stable Diffusion using Text Inversion
+A Gradio web application that generates images using Stable Diffusion with various text inversion concepts and loss functions.
+## Features
+- Generate images using Stable Diffusion v1.4
+- Apply different artistic styles using text inversion concepts:
+  - Dreams
+  - Midjourney Style
+  - Moebius
+  - Marc Allante
+  - WLOP
+- Automatic application of multiple loss functions:
+  - No Loss (Base generation)
+  - Blue Channel Loss
+  - Elastic Transformation Loss
+  - Symmetry Loss
+  - Saturation Loss
+- User-friendly interface with preset prompts and custom prompt input
+- Side-by-side comparison of different loss function effects
+## Usage
+1. Select a preset prompt or enter your own custom prompt
+2. Choose a style concept from the dropdown menu
+3. Click "Submit" to generate images
+4. View the results showing different loss function effects side by side
+## Installation
+1. Clone this repository
+2. Install the required dependencies:
+```bash
+pip install -r requirements.txt
+```
+3. Run the application:
+```bash
+python app.py
+```
+## Requirements
+- Python 3.7+
+- PyTorch
+- Diffusers
+- Transformers
+- Gradio
+- Torchvision
+- PIL
+## Model Details
+The application uses the CompVis/stable-diffusion-v1-4 model with text inversion concepts from the Hugging Face SD concepts library. The model runs with float16 precision and supports CUDA, MPS, and CPU devices.
+## License
+This project uses the CompVis/stable-diffusion-v1-4 model which is subject to the CreativeML Open RAIL-M license.

app.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import gradio as gr
+import torch
+import gc
+from PIL import Image
+import torchvision.transforms as T
+import torch.nn.functional as F
+from diffusers import DiffusionPipeline, LMSDiscreteScheduler
+# Initialize model and configurations
+# At the top level, add global variables
+pipe = None
+device = None
+elastic_transformer = None
+def init_model():
+    global pipe, device
+    if pipe is not None:
+        return pipe, device
+    torch_device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
+    torch_dtype = torch.float16 if torch_device == "cuda" else torch.float32
+    pipe = DiffusionPipeline.from_pretrained(
+        "CompVis/stable-diffusion-v1-4",
+        torch_dtype=torch_dtype
+    ).to(torch_device)
+    # Load SD concepts
+    concepts = {
+        "dreams": "sd-concepts-library/dreams",
+        "midjourney-style": "sd-concepts-library/midjourney-style",
+        "moebius": "sd-concepts-library/moebius",
+        "marc-allante": "sd-concepts-library/style-of-marc-allante",
+        "wlop": "sd-concepts-library/wlop-style"
+    }
+    for concept in concepts.values():
+        pipe.load_textual_inversion(concept, mean_resizing=False)
+    device = torch_device
+    return pipe, device
+def init_transformers(device):
+    global elastic_transformer
+    if elastic_transformer is not None:
+        return elastic_transformer
+    elastic_transformer = T.ElasticTransform(alpha=550.0, sigma=5.0).to(device)
+    return elastic_transformer
+# Add after init_transformers and before generate_images
+def image_loss(images, loss_type, device, elastic_transformer):
+    if loss_type == 'blue':
+        error = torch.abs(images[:,2] - 0.9).mean()
+        return error.to(device)
+    elif loss_type == 'elastic':
+        transformed_imgs = elastic_transformer(images)
+        error = torch.abs(transformed_imgs - images).mean()
+        return error.to(device)
+    elif loss_type == 'symmetry':
+        flipped_image = torch.flip(images, [3])
+        error = F.mse_loss(images, flipped_image)
+        return error.to(device)
+    elif loss_type == 'saturation':
+        transformed_imgs = T.functional.adjust_saturation(images, saturation_factor=10)
+        error = torch.abs(transformed_imgs - images).mean()
+        return error.to(device)
+    else:
+        return torch.tensor(0.0).to(device)
+def generate_images(prompt, concept):
+    global pipe, device, elastic_transformer
+    if pipe is None:
+        pipe, device = init_model()
+    if elastic_transformer is None:
+        elastic_transformer = init_transformers(device)
+    # Configuration
+    height, width = 384, 384
+    guidance_scale = 8
+    num_inference_steps = 45
+    loss_scale = 10.0
+    # Create scheduler
+    scheduler = LMSDiscreteScheduler(
+        beta_start=0.00085,
+        beta_end=0.012,
+        beta_schedule="scaled_linear",
+        num_train_timesteps=1000
+    )
+    pipe.scheduler = scheduler  # Set the scheduler
+    # Create prompt text
+    prompt_text = f"{prompt} {concept}"
+    # Predefined seeds for each loss function
+    seeds = {
+        'none': 42,
+        'blue': 123,
+        'elastic': 456,
+        'symmetry': 789,
+        'saturation': 1000
+    }
+    loss_functions = ['none', 'blue', 'elastic', 'symmetry', 'saturation']
+    images = []
+    progress = gr.Progress()
+    # Generate image for each loss function
+    for idx, loss_type in enumerate(loss_functions):
+        progress(idx/len(loss_functions), f"Generating {loss_type} image...")
+        generator = torch.manual_seed(seeds[loss_type])
+        # Generate base image
+        try:
+            output = pipe(
+                prompt_text,
+                height=height,
+                width=width,
+                num_inference_steps=num_inference_steps,
+                guidance_scale=guidance_scale,
+                generator=generator
+            )
+        except Exception as e:
+            print(f"Error generating image: {e}")
+            return None
+        # Apply loss function if not 'none'
+        if loss_type != 'none':
+            try:
+                # Convert PIL image to tensor and move to device
+                image_tensor = T.ToTensor()(output.images[0]).unsqueeze(0).to(device)
+                # Apply loss and update image
+                loss = image_loss(image_tensor, loss_type, device, elastic_transformer)
+                image_tensor = image_tensor - loss_scale * loss
+                # Move back to CPU and convert to PIL
+                image = T.ToPILImage()(image_tensor.cpu().squeeze(0).clamp(0, 1))
+            except Exception as e:
+                print(f"Error applying {loss_type} loss: {e}")
+                image = output.images[0]  # Use original image if loss fails
+        else:
+            image = output.images[0]
+        # Add image with its label
+        try:
+            # Ensure image is in correct format (PIL.Image)
+            if not isinstance(image, Image.Image):
+                print(f"Warning: Converting {loss_type} image to PIL format")
+                image = Image.fromarray(image)
+            # Add tuple of (image, label) to list
+            images.append((image, f"{loss_type.capitalize()} Loss"))
+            print(f"Added {loss_type} image to gallery")  # Debug print
+        except Exception as e:
+            print(f"Error adding {loss_type} image to gallery: {e}")
+            continue
+        # Clear GPU memory after each image
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            gc.collect()
+    # Return all generated images
+    print(f"Returning {len(images)} images")
+    if not images:
+        return None
+    return images
+def create_interface():
+    default_prompts = [
+        "A realistic image of Boy with a cowboy hat in the style of",
+        "A realistic image of Rabbit in a spacesuit in the style of",
+        "A rugged soldier in full combat gear, standing on a battlefield at dusk, dramatic lighting, highly detailed, cinematic style in the style of"
+    ]
+    concepts = [
+        "dreams",
+        "midjourney-style",
+        "moebius",
+        "marc-allante",
+        "wlop"
+    ]
+    interface = gr.Interface(
+        fn=generate_images,
+        inputs=[
+            gr.Dropdown(choices=default_prompts, label="Select a preset prompt or type your own", allow_custom_value=True),
+            gr.Dropdown(choices=concepts, label="Select SD Concept")
+        ],
+        outputs=gr.Gallery(
+            label="Generated Images (From Left to Right: Original, Blue Loss, Elastic Loss, Symmetry Loss, Saturation Loss)",
+            show_label=True,
+            elem_id="gallery",
+            columns=5,
+            rows=1,
+            height=512,
+            object_fit="contain"
+        ),  # Simplified Gallery definition
+        title="Stable Diffusion using Text Inversion",
+        description="""Generate images using Stable Diffusion with different style concepts. The output shows 5 images side by side:
+        1. Original Image (No Loss)
+        2. Blue Channel Loss - Enhances blue tones
+        3. Elastic Loss - Adds elastic deformation
+        4. Symmetry Loss - Enforces symmetrical features
+        5. Saturation Loss - Modifies color saturation
+        Note: Image generation may take several minutes. Please be patient while the images are being processed.""",
+        flagging_mode="never"  # Updated from allow_flagging
+    )
+    return interface
+if __name__ == "__main__":
+    interface = create_interface()
+    interface.queue(max_size=5)  # Simplified queue configuration
+    interface.launch(
+        share=True,
+        server_name="0.0.0.0",
+        max_threads=1
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch
+diffusers
+transformers
+gradio
+torchvision
+Pillow
+scipy