File size: 6,807 Bytes
079a382
 
 
 
b4f042d
6c51e38
53e6930
079a382
 
 
 
 
 
 
 
 
 
 
 
f27dee7
b4f042d
 
f27dee7
 
b4f042d
f27dee7
b4f042d
 
f27dee7
 
 
 
 
079a382
 
b4f042d
 
 
f27dee7
 
 
 
 
b4f042d
 
 
079a382
 
8a37172
 
913fdfc
f27dee7
 
 
 
b4f042d
 
f27dee7
b4f042d
3609460
b4f042d
 
 
 
 
 
 
 
 
 
ced387c
b4f042d
079a382
 
d6b75b6
daa5f41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9e83e2
daa5f41
 
 
 
 
 
 
 
c382020
daa5f41
 
 
 
 
 
 
 
 
 
 
 
 
e9e83e2
daa5f41
 
 
70db990
daa5f41
 
70db990
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
daa5f41
70db990
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
daa5f41
 
b4f042d
079a382
b7d4359
8a37172
b4f042d
079a382
 
ced387c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import gradio as gr
import torch
import spaces
from diffusers import FluxInpaintPipeline
from PIL import Image, ImageFile

#ImageFile.LOAD_TRUNCATED_IMAGES = True

# Initialize the pipeline
pipe = FluxInpaintPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-dev", 
    torch_dtype=torch.bfloat16
)
pipe.to("cuda")
pipe.load_lora_weights(
    "ali-vilab/In-Context-LoRA", 
    weight_name="visual-identity-design.safetensors"
)

def square_center_crop(img, target_size=768):
    if img.mode in ('RGBA', 'P'):
        img = img.convert('RGB')

    width, height = img.size
    crop_size = min(width, height)

    left = (width - crop_size) // 2
    top = (height - crop_size) // 2
    right = left + crop_size
    bottom = top + crop_size

    img_cropped = img.crop((left, top, right, bottom))
    return img_cropped.resize((target_size, target_size), Image.Resampling.LANCZOS)

def duplicate_horizontally(img):
    width, height = img.size
    if width != height:
        raise ValueError(f"Input image must be square, got {width}x{height}")

    new_image = Image.new('RGB', (width * 2, height))
    new_image.paste(img, (0, 0))
    new_image.paste(img, (width, 0))
    return new_image

# Load the mask image
mask = Image.open("mask_square.png")

@spaces.GPU
def generate(image, prompt_description, prompt_user, progress=gr.Progress(track_tqdm=True)):
    prompt_structure = "The two-panel image showcases the logo on the left and the application on the right, [LEFT] the left panel is showing "+prompt_description+" [RIGHT] this logo is applied to "
    prompt = prompt_structure + prompt_user 

    cropped_image = square_center_crop(image)
    logo_dupli = duplicate_horizontally(cropped_image)

    out = pipe(
        prompt=prompt,
        image=logo_dupli,
        mask_image=mask,
        guidance_scale=3.5,
        height=768,
        width=1536,
        num_inference_steps=28,
        max_sequence_length=256,
        strength=1
    ).images[0]

    width, height = out.size
    half_width = width // 2
    image_2 = out.crop((half_width, 0, width, height))
    return image_2, out

with gr.Blocks() as demo:
    gr.Markdown("# Logo in Context")
    gr.Markdown("### [In-Context LoRA](https://huggingface.co/ali-vilab/In-Context-LoRA) + Image-to-Image + Inpainting, apply your logo to anything. diffusers implementation based on the [workflow by WizardWhitebeard/klinter](https://civitai.com/articles/8779)")

    with gr.Tab("Demo"):
        with gr.Row():
            with gr.Column():
                input_image = gr.Image(
                    label="Upload Logo Image",
                    type="pil",
                    height=384
                )
                prompt_description = gr.Textbox(
                    label="Describe your logo",
                    placeholder="A Hugging Face emoji logo",
                )
                prompt_input = gr.Textbox(
                    label="Where should the logo be applied?",
                    placeholder="e.g., a coffee cup on a wooden table"
                )
                generate_btn = gr.Button("Generate Application", variant="primary")
    
            with gr.Column():
                output_image = gr.Image(label="Generated Application")
                output_side = gr.Image(label="Side by side")
        
        gr.Examples(
            examples=[
                ["huggingface.png", "A Hugging Face emoji logo", "An embroidered hat"],
                ["awesome.png", "An awesome face logo", "A tattoo on a leg"],
                ["dvd_logo.png", "A DVD logo", "a coconut, engraved logo on a green coconut"]
            ],
            inputs=[input_image, prompt_description, prompt_input],
            outputs=[output_image, output_side],
            fn=generate,
            cache_examples="lazy"
        )
        
        with gr.Row():
            gr.Markdown("""
            ### Instructions:
            1. Upload a logo image (preferably square)
            2. Describe where you'd like to see the logo applied
            3. Click 'Generate Application' and wait for the result
    
            Note: The generation process might take a few moments.
            """)
    
    with gr.Tab("🧨 diffusers implementation"):
        gr.Markdown("The way this works is combining the [IC LoRA](https://github.com/ali-vilab/In-Context-LoRA) with image-to-image + inpainting. Where the image on the left (the logo) is uploaded by the user, and the image on the right is masked and applied on the product by the LoRA. Based on the [ComfyUI workflow by WizardWhitebeard/klinter](https://civitai.com/articles/8779). Below is a diffusers implementation of the idea")
        
        gr.Code(language="python", value="""# Support functions
def square_center_crop(img, target_size=768):
    if img.mode in ('RGBA', 'P'):
        img = img.convert('RGB')
    
    width, height = img.size
    crop_size = min(width, height)
    
    left = (width - crop_size) // 2
    top = (height - crop_size) // 2
    right = left + crop_size
    bottom = top + crop_size
    
    img_cropped = img.crop((left, top, right, bottom))
    return img_cropped.resize((target_size, target_size), Image.Resampling.LANCZOS)

def duplicate_horizontally(img):
    width, height = img.size
    if width != height:
        raise ValueError(f"Input image must be square, got {width}x{height}")
    
    new_image = Image.new('RGB', (width * 2, height))
    new_image.paste(img, (0, 0))
    new_image.paste(img, (width, 0))
    return new_image"""
        )
        
        gr.Code(language="python", value="""import torch
from diffusers import FluxInpaintPipeline
from PIL import Image

pipe = FluxInpaintPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16)
pipe.to("cuda")
pipe.load_lora_weights("ali-vilab/In-Context-LoRA", weight_name="visual-identity-design.safetensors")

mask = load_image("mask_square.png")
image = load_image("the_logo.png")
cropped_image = square_center_crop(image) #crop the image you upload to square
logo_dupli = duplicate_horizontally(cropped_image) #duplicate it so the right side can be masked

prompt_structure = "The two-panel image showcases the logo of a brand, [LEFT] the left panel is showing the logo [RIGHT] the right panel has this logo applied to "
prompt = prompt_structure + "an coconut, engraved logo on a green coconut"
out = pipe(
    prompt=prompt,
    image=logo_dupli,
    mask_image=mask,
    guidance_scale=6,
    height=768,
    width=1536,
    num_inference_steps=28,
    max_sequence_length=256,
    strength=1
).images[0]"""
        )
        
    # Set up the click event
    generate_btn.click(
        fn=generate,
        inputs=[input_image, prompt_description, prompt_input],
        outputs=[output_image, output_side]
    )

demo.launch()