Spaces:

annie08
/

Text-or-Image-to-Image-Scratch-Implementation

Running

App Files Files Community

anushka81 commited on Dec 19, 2024

Commit

bad655a

1 Parent(s): 54c658a

changes

Browse files

Files changed (2) hide show

app.py +57 -20
requirements.txt +1 -2

app.py CHANGED Viewed

@@ -1,21 +1,59 @@
 import gradio as gr
-from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
 import torch
 from PIL import Image
 def text_to_image(prompt, negative_prompt, guidance_scale, num_inference_steps):
-    pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to("cuda")
-    image = pipe(prompt, negative_prompt=negative_prompt, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps).images[0]
     return image
-def image_to_image(prompt, negative_prompt, init_image, strength, guidance_scale, num_inference_steps):
-    pipe = StableDiffusionImg2ImgPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to("cuda")
-    init_image = init_image.convert("RGB").resize((512, 512))
-    image = pipe(prompt, negative_prompt=negative_prompt, init_image=init_image, strength=strength, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps).images[0]
-    return image
 with gr.Blocks(theme='Respair/[email protected]') as demo:
-    gr.Markdown("# Text-to-Image and Image-to-Image")
     with gr.Tab("Text-to-Image"):
         with gr.Row():
@@ -28,24 +66,23 @@ with gr.Blocks(theme='Respair/[email protected]') as demo:
             generate_btn = gr.Button("Generate", elem_classes=["primary-button"])
         with gr.Row():
             text_output = gr.Image(label="Generated Image")
-        generate_btn.click(text_to_image, inputs=[text_prompt, text_negative_prompt, guidance_scale, num_inference_steps], outputs=text_output)
     with gr.Tab("Image-to-Image"):
         with gr.Row():
-            init_image = gr.Image(type="pil", tool="editor", label="Initial Image")
-        with gr.Row():
-            img_prompt = gr.Textbox(label="Prompt", placeholder="Describe modifications...")
-            img_negative_prompt = gr.Textbox(label="Negative Prompt", placeholder="Enter what to avoid...")
         with gr.Row():
-            strength = gr.Slider(0.1, 1.0, value=0.75, step=0.05, label="Strength")
-            img_guidance_scale = gr.Slider(1, 20, value=7.5, step=0.1, label="Guidance Scale")
-            img_num_inference_steps = gr.Slider(10, 100, value=50, step=1, label="Inference Steps")
         with gr.Row():
-            img_generate_btn = gr.Button("Generate", elem_classes=["primary-button"])
         with gr.Row():
             img_output = gr.Image(label="Modified Image")
-        img_generate_btn.click(image_to_image, inputs=[img_prompt, img_negative_prompt, init_image, strength, img_guidance_scale, img_num_inference_steps], outputs=img_output)
 demo.launch(share=True)

 import gradio as gr
 import torch
+from diffusers import StableDiffusionPipeline
+from torchvision.models.segmentation import fcn_resnet50
+from torchvision.transforms import Compose, ToTensor, Normalize, Resize, ToPILImage
 from PIL import Image
+# Device configuration
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load Stable Diffusion for text-to-image
+text_to_image_pipe = StableDiffusionPipeline.from_pretrained(
+    "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 if device == "cuda" else torch.float32
+).to(device)
+# Load a pre-trained FCN model for image-to-image transformations
+unet_model = fcn_resnet50(pretrained=True).eval().to(device)
+# Transforms for UNet
+preprocess = Compose([
+    Resize((512, 512)),
+    ToTensor(),
+    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+])
+postprocess = Compose([
+    ToPILImage(),
+])
+# Function for Text-to-Image
 def text_to_image(prompt, negative_prompt, guidance_scale, num_inference_steps):
+    image = text_to_image_pipe(
+        prompt,
+        negative_prompt=negative_prompt,
+        guidance_scale=guidance_scale,
+        num_inference_steps=num_inference_steps,
+    ).images[0]
     return image
+# Function for Image-to-Image using Dynamic UNet
+def apply_dynamic_unet(init_image, strength):
+    with torch.no_grad():
+        image_tensor = preprocess(init_image).unsqueeze(0).to(device)
+        output = unet_model(image_tensor)["out"][0]
+        output = torch.softmax(output, dim=0)  # Normalize predictions
+        mask = output.argmax(dim=0).float().cpu()
+        blended = (strength * mask.unsqueeze(0) + (1 - strength) * image_tensor[0].cpu()).clamp(0, 1)
+        blended_image = postprocess(blended)
+    return blended_image
+# Gradio Interface
 with gr.Blocks(theme='Respair/[email protected]') as demo:
+    gr.Markdown("# Text-to-Image and Image-to-Image ")
     with gr.Tab("Text-to-Image"):
         with gr.Row():
             generate_btn = gr.Button("Generate", elem_classes=["primary-button"])
         with gr.Row():
             text_output = gr.Image(label="Generated Image")
+        generate_btn.click(
+            text_to_image,
+            inputs=[text_prompt, text_negative_prompt, guidance_scale, num_inference_steps],
+            outputs=text_output,
+        )
     with gr.Tab("Image-to-Image"):
         with gr.Row():
+            init_image = gr.Image(type="pil", label="Upload Initial Image")
         with gr.Row():
+            strength = gr.Slider(0.1, 1.0, value=0.75, step=0.05, label="Blend Strength")
         with gr.Row():
+            img_generate_btn = gr.Button("Apply UNet", elem_classes=["primary-button"])
         with gr.Row():
             img_output = gr.Image(label="Modified Image")
+        img_generate_btn.click(apply_dynamic_unet, inputs=[init_image, strength], outputs=img_output)
 demo.launch(share=True)

requirements.txt CHANGED Viewed

@@ -1,7 +1,6 @@
 gradio
 torch
 diffusers
 transformers
-accelerate
-huggingface_hub
 Pillow

 gradio
 torch
+torchvision
 diffusers
 transformers
 Pillow