Spaces:

torinriley
/

Diffusion

Running

App Files Files Community

torinriley commited on Mar 20

Commit

b7b4c25

1 Parent(s): d9cf71d

updatwe, added mreo optiosn

Browse files

Files changed (14) hide show

app.py +211 -20
disclaimer.md +30 -0
src/__pycache__/__init__.cpython-312.pyc +0 -0
src/__pycache__/attention.cpython-312.pyc +0 -0
src/__pycache__/clip.cpython-312.pyc +0 -0
src/__pycache__/config.cpython-312.pyc +0 -0
src/__pycache__/ddpm.cpython-312.pyc +0 -0
src/__pycache__/decoder.cpython-312.pyc +0 -0
src/__pycache__/diffusion.cpython-312.pyc +0 -0
src/__pycache__/encoder.cpython-312.pyc +0 -0
src/__pycache__/model_converter.cpython-312.pyc +0 -3
src/__pycache__/model_loader.cpython-312.pyc +0 -0
src/__pycache__/pipeline.cpython-312.pyc +0 -0
src/pipeline.py +99 -40

app.py CHANGED Viewed

@@ -52,7 +52,7 @@ config.models = model_loader.load_models(str(model_file), device)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
-def infer(
     prompt,
     negative_prompt,
     seed,
@@ -77,6 +77,7 @@ def infer(
     output_image = pipeline.generate(
         prompt=prompt,
         uncond_prompt=negative_prompt,
         config=config
     )
@@ -85,6 +86,103 @@ def infer(
     return image, seed
 examples = [
     "A ultra sharp photorealtici painting of a futuristic cityscape at night with neon lights and flying cars",
     "A serene mountain landscape at sunset with snow-capped peaks and a clear lake reflection",
@@ -96,31 +194,81 @@ css = """
     margin: 0 auto;
     max-width: 640px;
 }
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown(" # LiteDiffusion")
-        with gr.Row():
-            prompt = gr.Text(
-                label="Prompt",
-                show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt",
-                container=False,
-            )
-            run_button = gr.Button("Run", scale=0, variant="primary")
-        result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
             negative_prompt = gr.Text(
                 label="Negative prompt",
                 max_lines=1,
                 placeholder="Enter a negative prompt",
-                visible=False,
             )
             seed = gr.Slider(
@@ -166,14 +314,54 @@ with gr.Blocks(css=css) as demo:
                     step=1,
                     value=50,
                 )
-        gr.Examples(examples=examples, inputs=[prompt])
-    gr.on(
-        triggers=[run_button.click, prompt.submit],
-        fn=infer,
         inputs=[
-            prompt,
             negative_prompt,
             seed,
             randomize_seed,
@@ -181,8 +369,11 @@ with gr.Blocks(css=css) as demo:
             height,
             guidance_scale,
             num_inference_steps,
         ],
-        outputs=[result, seed],
     )
 if __name__ == "__main__":

 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
+def txt2img(
     prompt,
     negative_prompt,
     seed,
     output_image = pipeline.generate(
         prompt=prompt,
         uncond_prompt=negative_prompt,
+        input_image=None,
         config=config
     )
     return image, seed
+def img2img(
+    prompt,
+    negative_prompt,
+    seed,
+    randomize_seed,
+    width,
+    height,
+    guidance_scale,
+    num_inference_steps,
+    input_image,
+    strength,
+    progress=gr.Progress(track_tqdm=True),
+):
+    try:
+        if randomize_seed:
+            seed = random.randint(0, MAX_SEED)
+        if input_image is None:
+            return None, seed
+        # Update config with user settings
+        config.seed = seed
+        config.diffusion.cfg_scale = guidance_scale
+        config.diffusion.n_inference_steps = num_inference_steps
+        config.model.width = width
+        config.model.height = height
+        config.diffusion.strength = strength
+        # Generate image
+        output_image = pipeline.generate(
+            prompt=prompt,
+            uncond_prompt=negative_prompt,
+            input_image=input_image,
+            config=config
+        )
+        # Convert numpy array to PIL Image
+        image = Image.fromarray(output_image)
+        return image, seed
+    except Exception as e:
+        print(f"Error in img2img: {str(e)}")
+        gr.Warning(f"Error: {str(e)}")
+        return None, seed
+def inpaint(
+    prompt,
+    negative_prompt,
+    seed,
+    randomize_seed,
+    width,
+    height,
+    guidance_scale,
+    num_inference_steps,
+    input_image,
+    mask_image,
+    strength,
+    progress=gr.Progress(track_tqdm=True),
+):
+    try:
+        if randomize_seed:
+            seed = random.randint(0, MAX_SEED)
+        if input_image is None or mask_image is None:
+            gr.Warning("Both input image and mask are required for inpainting")
+            return None, seed
+        # Ensure mask is in the right format
+        if mask_image.mode != "L":
+            mask_image = mask_image.convert("L")
+        # Update config with user settings
+        config.seed = seed
+        config.diffusion.cfg_scale = guidance_scale
+        config.diffusion.n_inference_steps = num_inference_steps
+        config.model.width = width
+        config.model.height = height
+        config.diffusion.strength = strength
+        # Generate image with mask
+        output_image = pipeline.generate(
+            prompt=prompt,
+            uncond_prompt=negative_prompt,
+            input_image=input_image,
+            mask_image=mask_image,
+            config=config
+        )
+        # Convert numpy array to PIL Image
+        image = Image.fromarray(output_image)
+        return image, seed
+    except Exception as e:
+        print(f"Error in inpainting: {str(e)}")
+        gr.Warning(f"Error: {str(e)}")
+        return None, seed
 examples = [
     "A ultra sharp photorealtici painting of a futuristic cityscape at night with neon lights and flying cars",
     "A serene mountain landscape at sunset with snow-capped peaks and a clear lake reflection",
     margin: 0 auto;
     max-width: 640px;
 }
+.tabs {
+    margin-top: 10px;
+    margin-bottom: 10px;
+}
+.disclaimer {
+    font-size: 0.8em;
+    color: #666;
+    margin-top: 20px;
+}
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown(" # LiteDiffusion")
+        with gr.Tabs(elem_classes="tabs") as tabs:
+            with gr.TabItem("Text-to-Image"):
+                txt2img_prompt = gr.Text(
+                    label="Prompt",
+                    max_lines=1,
+                    placeholder="Enter your prompt",
+                )
+                txt2img_run = gr.Button("Generate", variant="primary")
+                txt2img_result = gr.Image(label="Result")
+            with gr.TabItem("Image-to-Image"):
+                img2img_prompt = gr.Text(
+                    label="Prompt",
+                    max_lines=1,
+                    placeholder="Enter your prompt",
+                )
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        input_image = gr.Image(label="Input Image", type="pil")
+                        strength_slider = gr.Slider(
+                            label="Strength",
+                            minimum=0.0,
+                            maximum=1.0,
+                            step=0.01,
+                            value=0.8,
+                        )
+                        img2img_run = gr.Button("Generate", variant="primary")
+                    with gr.Column(scale=1):
+                        img2img_result = gr.Image(label="Result")
+            with gr.TabItem("Inpainting"):
+                inpaint_prompt = gr.Text(
+                    label="Prompt",
+                    max_lines=1,
+                    placeholder="Enter your prompt",
+                )
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        inpaint_image = gr.Image(label="Input Image", type="pil")
+                        inpaint_mask = gr.Image(label="Mask (White areas will be inpainted)", type="pil")
+                        inpaint_strength = gr.Slider(
+                            label="Strength",
+                            minimum=0.0,
+                            maximum=1.0,
+                            step=0.01,
+                            value=0.8,
+                        )
+                        inpaint_run = gr.Button("Generate", variant="primary")
+                    with gr.Column(scale=1):
+                        inpaint_result = gr.Image(label="Result")
         with gr.Accordion("Advanced Settings", open=False):
             negative_prompt = gr.Text(
                 label="Negative prompt",
                 max_lines=1,
                 placeholder="Enter a negative prompt",
             )
             seed = gr.Slider(
                     step=1,
                     value=50,
                 )
+        gr.Markdown(
+            "By using LiteDiffusion, you agree to the terms in our [disclaimer](disclaimer.md).",
+            elem_classes="disclaimer"
+        )
+        # Example prompts for text to image
+        gr.Examples(examples=examples, inputs=[txt2img_prompt])
+    # Text-to-Image generation
+    txt2img_run.click(
+        fn=txt2img,
+        inputs=[
+            txt2img_prompt,
+            negative_prompt,
+            seed,
+            randomize_seed,
+            width,
+            height,
+            guidance_scale,
+            num_inference_steps,
+        ],
+        outputs=[txt2img_result, seed],
+    )
+    # Image-to-Image generation
+    img2img_run.click(
+        fn=img2img,
+        inputs=[
+            img2img_prompt,
+            negative_prompt,
+            seed,
+            randomize_seed,
+            width,
+            height,
+            guidance_scale,
+            num_inference_steps,
+            input_image,
+            strength_slider,
+        ],
+        outputs=[img2img_result, seed],
+    )
+    # Inpainting
+    inpaint_run.click(
+        fn=inpaint,
         inputs=[
+            inpaint_prompt,
             negative_prompt,
             seed,
             randomize_seed,
             height,
             guidance_scale,
             num_inference_steps,
+            inpaint_image,
+            inpaint_mask,
+            inpaint_strength,
         ],
+        outputs=[inpaint_result, seed],
     )
 if __name__ == "__main__":

disclaimer.md ADDED Viewed

	@@ -0,0 +1,30 @@

+# Disclaimer
+## LiteDiffusion - Legal Disclaimer
+The LiteDiffusion model ("the Model") is provided by Torin Etheridge ("the Author") as-is and without warranty of any kind, express or implied.
+### Limitation of Liability
+Torin Etheridge is not responsible for any misuse of this model or any content generated using this software. Users are solely responsible for how they use the Model and any content they generate with it.
+### Content Generation
+The Model is capable of generating synthetic images based on text prompts. Users are responsible for:
+- Ensuring they have the right to generate specific content
+- Using the generated content in accordance with applicable laws and regulations
+- Not using the Model to create harmful, offensive, or illegal content
+### No Medical or Professional Advice
+Content generated by the Model should not be used for medical, legal, financial, or other professional advice.
+### Changes to this Disclaimer
+This disclaimer may be updated from time to time without notice.
+### Contact
+If you have any questions about this disclaimer, please contact the Author.
+**By using LiteDiffusion, you acknowledge that you have read and understood this disclaimer.**

src/__pycache__/__init__.cpython-312.pyc DELETED Viewed

Binary file (196 Bytes)

src/__pycache__/attention.cpython-312.pyc DELETED Viewed

Binary file (4.69 kB)

src/__pycache__/clip.cpython-312.pyc DELETED Viewed

Binary file (4.02 kB)

src/__pycache__/config.cpython-312.pyc DELETED Viewed

Binary file (3.4 kB)

src/__pycache__/ddpm.cpython-312.pyc DELETED Viewed

Binary file (6.46 kB)

src/__pycache__/decoder.cpython-312.pyc DELETED Viewed

Binary file (4.93 kB)

src/__pycache__/diffusion.cpython-312.pyc DELETED Viewed

Binary file (14.2 kB)

src/__pycache__/encoder.cpython-312.pyc DELETED Viewed

Binary file (2.56 kB)

src/__pycache__/model_converter.cpython-312.pyc DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cc31a7458a7d5afc6251204fd5949d56297f0e0bc97b6b307d2d70b3e2b38d97
-size 170127

src/__pycache__/model_loader.cpython-312.pyc DELETED Viewed

Binary file (1.86 kB)

src/__pycache__/pipeline.cpython-312.pyc DELETED Viewed

Binary file (8.11 kB)

src/pipeline.py CHANGED Viewed

@@ -13,20 +13,6 @@ LATENTS_HEIGHT = HEIGHT // 8
 logging.basicConfig(level=logging.INFO)
-def generate(
-    prompt,
-    uncond_prompt=None,
-    input_image=None,
-    config: Config = default_config,
-):
-    with torch.no_grad():
-        validate_strength(config.diffusion.strength)
-        generator = initialize_generator(config.seed, config.device.device)
-        context = encode_prompt(prompt, uncond_prompt, config.diffusion.do_cfg, config.tokenizer, config.models["clip"], config.device.device)
-        latents = initialize_latents(input_image, config.diffusion.strength, generator, config.models, config.device.device, config.diffusion.sampler_name, config.diffusion.n_inference_steps)
-        images = run_diffusion(latents, context, config.diffusion.do_cfg, config.diffusion.cfg_scale, config.models, config.device.device, config.diffusion.sampler_name, config.diffusion.n_inference_steps, generator)
-        return postprocess_images(images)
 def validate_strength(strength):
     if not 0 < strength <= 1:
         raise ValueError("Strength must be between 0 and 1")
@@ -45,7 +31,7 @@ def encode_prompt(prompt, uncond_prompt, do_cfg, tokenizer, clip, device):
         cond_tokens = tokenizer.batch_encode_plus([prompt], padding="max_length", max_length=77).input_ids
         cond_tokens = torch.tensor(cond_tokens, dtype=torch.long, device=device)
         cond_context = clip(cond_tokens)
-        uncond_tokens = tokenizer.batch_encode_plus([uncond_prompt], padding="max_length", max_length=77).input_ids
         uncond_tokens = torch.tensor(uncond_tokens, dtype=torch.long, device=device)
         uncond_context = clip(uncond_tokens)
         context = torch.cat([cond_context, uncond_context])
@@ -55,17 +41,15 @@ def encode_prompt(prompt, uncond_prompt, do_cfg, tokenizer, clip, device):
         context = clip(tokens)
     return context
-def initialize_latents(input_image, strength, generator, models, device, sampler_name, n_inference_steps):
-    if input_image is None:
-        # Initialize with random noise
-        latents = torch.randn((1, 4, 64, 64), generator=generator, device=device)
-    else:
-        # Initialize with encoded input image
-        latents = encode_image(input_image, models, device)
-        # Add noise based on strength
-        noise = torch.randn_like(latents, generator=generator)
-        latents = (1 - strength) * latents + strength * noise
-    return latents
 def preprocess_image(input_image):
     input_image_tensor = input_image.resize((WIDTH, HEIGHT))
@@ -76,6 +60,51 @@ def preprocess_image(input_image):
     input_image_tensor = input_image_tensor.permute(0, 3, 1, 2)
     return input_image_tensor
 def get_sampler(sampler_name, generator, n_inference_steps):
     if sampler_name == "ddpm":
         sampler = DDPMSampler(generator)
@@ -84,6 +113,11 @@ def get_sampler(sampler_name, generator, n_inference_steps):
         raise ValueError(f"Unknown sampler value {sampler_name}.")
     return sampler
 def run_diffusion(latents, context, do_cfg, cfg_scale, models, device, sampler_name, n_inference_steps, generator):
     diffusion = models["diffusion"]
     diffusion.to(device)
@@ -108,17 +142,42 @@ def postprocess_images(images):
     images = images.to("cpu", torch.uint8).numpy()
     return images[0]
-def rescale(x, old_range, new_range, clamp=False):
-    old_min, old_max = old_range
-    new_min, new_max = new_range
-    x -= old_min
-    x *= (new_max - new_min) / (old_max - old_min)
-    x += new_min
-    if clamp:
-        x = x.clamp(new_min, new_max)
-    return x
-def get_time_embedding(timestep):
-    freqs = torch.pow(10000, -torch.arange(start=0, end=160, dtype=torch.float32) / 160)
-    x = torch.tensor([timestep], dtype=torch.float32)[:, None] * freqs[None]
-    return torch.cat([torch.cos(x), torch.sin(x)], dim=-1)

 logging.basicConfig(level=logging.INFO)
 def validate_strength(strength):
     if not 0 < strength <= 1:
         raise ValueError("Strength must be between 0 and 1")
         cond_tokens = tokenizer.batch_encode_plus([prompt], padding="max_length", max_length=77).input_ids
         cond_tokens = torch.tensor(cond_tokens, dtype=torch.long, device=device)
         cond_context = clip(cond_tokens)
+        uncond_tokens = tokenizer.batch_encode_plus([uncond_prompt or ""], padding="max_length", max_length=77).input_ids
         uncond_tokens = torch.tensor(uncond_tokens, dtype=torch.long, device=device)
         uncond_context = clip(uncond_tokens)
         context = torch.cat([cond_context, uncond_context])
         context = clip(tokens)
     return context
+def rescale(x, old_range, new_range, clamp=False):
+    old_min, old_max = old_range
+    new_min, new_max = new_range
+    x -= old_min
+    x *= (new_max - new_min) / (old_max - old_min)
+    x += new_min
+    if clamp:
+        x = x.clamp(new_min, new_max)
+    return x
 def preprocess_image(input_image):
     input_image_tensor = input_image.resize((WIDTH, HEIGHT))
     input_image_tensor = input_image_tensor.permute(0, 3, 1, 2)
     return input_image_tensor
+def encode_image(input_image, models, device):
+    # Preprocess the input image
+    image_tensor = preprocess_image(input_image).to(device)
+    # Encode the image using the VAE encoder
+    encoder = models["encoder"]
+    encoder.to(device)
+    with torch.no_grad():
+        # Create deterministic noise (zeros) since we want exact reconstruction
+        noise = torch.zeros((1, 4, LATENTS_WIDTH, LATENTS_HEIGHT), device=device)
+        latents = encoder(image_tensor, noise)
+    return latents
+def initialize_latents(input_image, strength, generator, models, device, sampler_name, n_inference_steps, mask_image=None):
+    if input_image is None:
+        # Initialize with random noise
+        latents = torch.randn((1, 4, LATENTS_WIDTH, LATENTS_HEIGHT), generator=generator, device=device)
+    else:
+        # Initialize with encoded input image
+        latents = encode_image(input_image, models, device)
+        # If mask is provided for inpainting
+        if mask_image is not None:
+            # Process mask
+            mask = mask_image.resize((WIDTH, HEIGHT))
+            mask = np.array(mask)
+            mask = torch.tensor(mask, dtype=torch.float32).to(device)
+            mask = mask / 255.0  # Normalize to 0-1
+            mask = mask.unsqueeze(0).unsqueeze(0)  # Add batch and channel dimensions
+            mask = F.interpolate(mask, (LATENTS_WIDTH, LATENTS_HEIGHT))
+            mask = mask.repeat(1, 4, 1, 1)  # Repeat for all latent channels
+            # Create masked noise - torch.randn_like doesn't accept generator
+            noise = torch.randn(latents.shape, device=device)
+            masked_latents = latents * (1 - mask) + noise * mask
+            latents = masked_latents
+        # Add noise based on strength (for img2img)
+        # torch.randn_like doesn't accept generator
+        noise = torch.randn(latents.shape, device=device)
+        latents = (1 - strength) * latents + strength * noise
+    return latents
 def get_sampler(sampler_name, generator, n_inference_steps):
     if sampler_name == "ddpm":
         sampler = DDPMSampler(generator)
         raise ValueError(f"Unknown sampler value {sampler_name}.")
     return sampler
+def get_time_embedding(timestep):
+    freqs = torch.pow(10000, -torch.arange(start=0, end=160, dtype=torch.float32) / 160)
+    x = torch.tensor([timestep], dtype=torch.float32)[:, None] * freqs[None]
+    return torch.cat([torch.cos(x), torch.sin(x)], dim=-1)
 def run_diffusion(latents, context, do_cfg, cfg_scale, models, device, sampler_name, n_inference_steps, generator):
     diffusion = models["diffusion"]
     diffusion.to(device)
     images = images.to("cpu", torch.uint8).numpy()
     return images[0]
+def generate(
+    prompt,
+    uncond_prompt=None,
+    input_image=None,
+    mask_image=None,
+    config: Config = default_config,
+):
+    with torch.no_grad():
+        # Validate inputs and parameters
+        if prompt is None or prompt.strip() == "":
+            raise ValueError("Prompt cannot be empty")
+        if uncond_prompt is None:
+            uncond_prompt = ""
+        validate_strength(config.diffusion.strength)
+        # Initialize generator for reproducibility
+        generator = initialize_generator(config.seed, config.device.device)
+        # Encode text prompt
+        context = encode_prompt(prompt, uncond_prompt, config.diffusion.do_cfg,
+                               config.tokenizer, config.models["clip"], config.device.device)
+        # Initialize latents (either from noise or from input image)
+        latents = initialize_latents(input_image, config.diffusion.strength, generator,
+                                    config.models, config.device.device,
+                                    config.diffusion.sampler_name,
+                                    config.diffusion.n_inference_steps,
+                                    mask_image)
+        # Run diffusion process
+        images = run_diffusion(latents, context, config.diffusion.do_cfg,
+                              config.diffusion.cfg_scale, config.models,
+                              config.device.device, config.diffusion.sampler_name,
+                              config.diffusion.n_inference_steps, generator)
+        # Post-process and return the images
+        return postprocess_images(images)