Spaces:

annie08
/

Text-or-Image-to-Image-Scratch-Implementation

Running

App Files Files Community

anushka81 commited on Dec 19, 2024

Commit

f48676a

1 Parent(s): d13ee8a

sd for i2i and t2i

Browse files

Files changed (1) hide show

app.py +34 -42

app.py CHANGED Viewed

@@ -1,37 +1,24 @@
 import gradio as gr
 import torch
-from diffusers import StableDiffusionPipeline
-from torchvision.models.segmentation import fcn_resnet50
-from torchvision.transforms import Compose, ToTensor, Normalize, Resize, ToPILImage
 from PIL import Image
 # Device configuration
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load Stable Diffusion for text-to-image
 text_to_image_pipe = StableDiffusionPipeline.from_pretrained(
     "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 if device == "cuda" else torch.float32
 ).to(device)
-# Load a pre-trained FCN model for image-to-image transformations
-unet_model = fcn_resnet50(pretrained=True).eval().to(device)
-# Transforms for UNet
-preprocess = Compose([
-    Resize((512, 512)),
-    ToTensor(),
-    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-])
-postprocess = Compose([
-    ToPILImage(),
-])
 # Function for Text-to-Image
 def text_to_image(prompt, negative_prompt, guidance_scale, num_inference_steps):
     image = text_to_image_pipe(
-        prompt,
         negative_prompt=negative_prompt,
         guidance_scale=guidance_scale,
         num_inference_steps=num_inference_steps,
@@ -39,26 +26,26 @@ def text_to_image(prompt, negative_prompt, guidance_scale, num_inference_steps):
     return image
-# Function for Image-to-Image using Dynamic UNet
-def apply_dynamic_unet(init_image, prompt, strength):
-    # Placeholder for prompt-based logic
-    print(f"Received prompt: {prompt}")  # You can define prompt-based rules here.
-    with torch.no_grad():
-        image_tensor = preprocess(init_image).unsqueeze(0).to(device)
-        output = unet_model(image_tensor)["out"][0]
-        output = torch.softmax(output, dim=0)  # Normalize predictions
-        mask = output.argmax(dim=0).float().cpu()
-        blended = (strength * mask.unsqueeze(0) + (1 - strength) * image_tensor[0].cpu()).clamp(0, 1)
-        blended_image = postprocess(blended)
-    return blended_image
 # Gradio Interface
-with gr.Blocks(theme='Respair/[email protected]') as demo:
-    gr.Markdown("# Text-to-Image and Image-to-Image ")
     with gr.Tab("Text-to-Image"):
         with gr.Row():
             text_prompt = gr.Textbox(label="Prompt", placeholder="Enter your text here...")
             text_negative_prompt = gr.Textbox(label="Negative Prompt", placeholder="Enter what to avoid...")
@@ -76,23 +63,28 @@ with gr.Blocks(theme='Respair/[email protected]') as demo:
             outputs=text_output,
         )
-    # Gradio Tab with Prompt
     with gr.Tab("Image-to-Image"):
         gr.Markdown(
-            "**Transform uploaded images using a dynamic UNet model.**\n"
-            "Provide a prompt to describe the transformation and use the `Blend Strength` slider to adjust blending."
         )
         with gr.Row():
             init_image = gr.Image(type="pil", label="Upload Initial Image")
         with gr.Row():
-            img_prompt = gr.Textbox(label="Prompt", placeholder="Describe the transformation (optional)...")
         with gr.Row():
-            strength = gr.Slider(0.1, 1.0, value=0.75, step=0.05, label="Blend Strength")
         with gr.Row():
-            img_generate_btn = gr.Button("Apply UNet", elem_classes=["primary-button"])
         with gr.Row():
             img_output = gr.Image(label="Modified Image")
-        img_generate_btn.click(apply_dynamic_unet, inputs=[init_image, img_prompt, strength], outputs=img_output)
 demo.launch(share=True)

 import gradio as gr
 import torch
+from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
 from PIL import Image
 # Device configuration
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load Stable Diffusion pipelines
 text_to_image_pipe = StableDiffusionPipeline.from_pretrained(
     "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 if device == "cuda" else torch.float32
 ).to(device)
+image_to_image_pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
+    "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 if device == "cuda" else torch.float32
+).to(device)
 # Function for Text-to-Image
 def text_to_image(prompt, negative_prompt, guidance_scale, num_inference_steps):
     image = text_to_image_pipe(
+        prompt=prompt,
         negative_prompt=negative_prompt,
         guidance_scale=guidance_scale,
         num_inference_steps=num_inference_steps,
     return image
+# Function for Image-to-Image
+def image_to_image(prompt, negative_prompt, init_image, strength, guidance_scale, num_inference_steps):
+    init_image = init_image.convert("RGB").resize((512, 512))  # Ensure the image is resized
+    image = image_to_image_pipe(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        init_image=init_image,
+        strength=strength,
+        guidance_scale=guidance_scale,
+        num_inference_steps=num_inference_steps,
+    ).images[0]
+    return image
 # Gradio Interface
+with gr.Blocks(theme='NoCrypt/miku') as demo:
+    gr.Markdown("# Text-to-Image and Image-to-Image generation")
     with gr.Tab("Text-to-Image"):
+        gr.Markdown("**Generate images from text prompts **")
         with gr.Row():
             text_prompt = gr.Textbox(label="Prompt", placeholder="Enter your text here...")
             text_negative_prompt = gr.Textbox(label="Negative Prompt", placeholder="Enter what to avoid...")
             outputs=text_output,
         )
     with gr.Tab("Image-to-Image"):
         gr.Markdown(
+            "**Modify images - Upload an image, provide a prompt describing the transformation, and adjust settings for desired results."
         )
         with gr.Row():
             init_image = gr.Image(type="pil", label="Upload Initial Image")
         with gr.Row():
+            img_prompt = gr.Textbox(label="Prompt", placeholder="Describe modifications...")
+            img_negative_prompt = gr.Textbox(label="Negative Prompt", placeholder="Enter what to avoid...")
         with gr.Row():
+            strength = gr.Slider(0.1, 1.0, value=0.75, step=0.05, label="Strength")
+            img_guidance_scale = gr.Slider(1, 20, value=7.5, step=0.1, label="Guidance Scale")
+            img_num_inference_steps = gr.Slider(10, 100, value=50, step=1, label="Inference Steps")
         with gr.Row():
+            img_generate_btn = gr.Button("Generate", elem_classes=["primary-button"])
         with gr.Row():
             img_output = gr.Image(label="Modified Image")
+        img_generate_btn.click(
+            image_to_image,
+            inputs=[img_prompt, img_negative_prompt, init_image, strength, img_guidance_scale, img_num_inference_steps],
+            outputs=img_output,
+        )
 demo.launch(share=True)