multiview-incontext

Running on Zero

App Files Files Community

multimodalart HF staff commited on Nov 20

Commit

f27dee7

•

1 Parent(s): 85875c3

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -55

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import torch
 import spaces
 from diffusers import FluxInpaintPipeline
 from PIL import Image, ImageFile
-import numpy as np
 #ImageFile.LOAD_TRUNCATED_IMAGES = True
@@ -18,57 +17,47 @@ pipe.load_lora_weights(
     weight_name="visual-identity-design.safetensors"
 )
-def square_center_crop_numpy(img, target_size=768):
     if img.mode in ('RGBA', 'P'):
         img = img.convert('RGB')
-    # Convert PIL image to numpy array
-    img_array = np.array(img)
-    # Get dimensions
-    height, width = img_array.shape[:2]
     crop_size = min(width, height)
-    # Calculate crop coordinates
     left = (width - crop_size) // 2
     top = (height - crop_size) // 2
-    # Perform the crop on numpy array
-    img_cropped = img_array[top:top+crop_size, left:left+crop_size]
-    # Convert back to PIL and resize
-    img_pil = Image.fromarray(img_cropped)
-    return img_pil.resize((target_size, target_size), Image.Resampling.LANCZOS)
 def duplicate_horizontally(img):
-    # Convert PIL Image to numpy array
     width, height = img.size
     if width != height:
         raise ValueError(f"Input image must be square, got {width}x{height}")
-    img_array = np.array(img)
-    duplicated = np.concatenate([img_array, img_array], axis=1)
-    return Image.fromarray(duplicated)
 # Load the mask image
 mask = Image.open("mask_square.png")
-def crop_input(image):
-    cropped_image = square_center_crop(image)
-    return cropped_image
 @spaces.GPU
 def generate(image, prompt_user, progress=gr.Progress(track_tqdm=True)):
     prompt_structure = "The two-panel image showcases the logo of a brand, [LEFT] the left panel is showing the logo [RIGHT] the right panel has this logo applied to "
     prompt = prompt_structure + prompt_user
-    print(image)
-    image = duplicate_horizontally(image)
     out = pipe(
         prompt=prompt,
-        image=image,
         mask_image=mask,
-        guidance_scale=3.75,
         height=768,
         width=1536,
         num_inference_steps=28,
@@ -89,34 +78,19 @@ with gr.Blocks() as demo:
         with gr.Column():
             input_image = gr.Image(
                 label="Upload Logo Image",
-                type="pil"
-            )
-            cropped_image = gr.Image(
-                visible=False,
-                type="pil"
             )
             prompt_input = gr.Textbox(
                 label="Where should the logo be applied?",
-                placeholder="e.g., a coffee cup on a wooden table"
             )
             generate_btn = gr.Button("Generate Application", variant="primary")
         with gr.Column():
             output_image = gr.Image(label="Generated Application")
             output_side = gr.Image(label="Side by side")
-    gr.Examples(
-        examples=[
-            ["huggingface.png", "A hat"],
-            ["awesome.png", "A tattoo on a leg"],
-            ["dvd_logo.png", "a flower pot"]
-        ],
-        inputs=[input_image, prompt_input],
-        outputs=[output_image, output_side],
-        fn=generate,
-        cache_examples="lazy"
-    )
     with gr.Row():
         gr.Markdown("""
         ### Instructions:
@@ -129,12 +103,8 @@ with gr.Blocks() as demo:
     # Set up the click event
     generate_btn.click(
-        fn=crop_input,
-        inputs=[input_image],
-        outputs=[cropped_image]
-    ).then(
         fn=generate,
-        inputs=[cropped_image, prompt_input],
         outputs=[output_image, output_side]
     )

 import spaces
 from diffusers import FluxInpaintPipeline
 from PIL import Image, ImageFile
 #ImageFile.LOAD_TRUNCATED_IMAGES = True
     weight_name="visual-identity-design.safetensors"
 )
+def square_center_crop(img, target_size=768):
     if img.mode in ('RGBA', 'P'):
         img = img.convert('RGB')
+    width, height = img.size
     crop_size = min(width, height)
     left = (width - crop_size) // 2
     top = (height - crop_size) // 2
+    right = left + crop_size
+    bottom = top + crop_size
+    img_cropped = img.crop((left, top, right, bottom))
+    return img_cropped.resize((target_size, target_size), Image.Resampling.LANCZOS)
 def duplicate_horizontally(img):
     width, height = img.size
     if width != height:
         raise ValueError(f"Input image must be square, got {width}x{height}")
+    new_image = Image.new('RGB', (width * 2, height))
+    new_image.paste(img, (0, 0))
+    new_image.paste(img, (width, 0))
+    return new_image
 # Load the mask image
 mask = Image.open("mask_square.png")
 @spaces.GPU
 def generate(image, prompt_user, progress=gr.Progress(track_tqdm=True)):
     prompt_structure = "The two-panel image showcases the logo of a brand, [LEFT] the left panel is showing the logo [RIGHT] the right panel has this logo applied to "
     prompt = prompt_structure + prompt_user
+    cropped_image = square_center_crop(image)
+    logo_dupli = duplicate_horizontally(cropped_image)
     out = pipe(
         prompt=prompt,
+        image=logo_dupli,
         mask_image=mask,
+        guidance_scale=6,
         height=768,
         width=1536,
         num_inference_steps=28,
         with gr.Column():
             input_image = gr.Image(
                 label="Upload Logo Image",
+                type="pil",
+                height=384
             )
             prompt_input = gr.Textbox(
                 label="Where should the logo be applied?",
+                placeholder="e.g., a coffee cup on a wooden table",
+                lines=2
             )
             generate_btn = gr.Button("Generate Application", variant="primary")
         with gr.Column():
             output_image = gr.Image(label="Generated Application")
             output_side = gr.Image(label="Side by side")
     with gr.Row():
         gr.Markdown("""
         ### Instructions:
     # Set up the click event
     generate_btn.click(
         fn=generate,
+        inputs=[input_image, prompt_input],
         outputs=[output_image, output_side]
     )