File size: 8,859 Bytes
6dc8351
 
fe068dc
6dc8351
 
 
 
a895534
6dc8351
 
0dcfce6
 
6dc8351
0dcfce6
6dc8351
 
 
 
 
0dcfce6
6dc8351
 
 
 
 
 
 
 
 
 
0dcfce6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6dc8351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
733468c
6dc8351
733468c
6dc8351
 
9c085bb
6dc8351
0dcfce6
 
6dc8351
0dcfce6
 
 
6dc8351
 
 
02529b4
6dc8351
 
 
 
 
 
 
 
 
 
 
 
 
0dcfce6
 
6dc8351
0dcfce6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6dc8351
0dcfce6
 
 
6dc8351
0dcfce6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import os 
donwload_repo_loc= "./models/image_encoder/"
os.system("pip install -U peft")
# os.system(f"wget -O {donwload_repo_loc}config.json https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/config.json?download=true")
# os.system(f"wget -O {donwload_repo_loc}model.safetensors https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/model.safetensors?download=true")
# os.system(f"wget -O {donwload_repo_loc}pytorch_model.bin https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/pytorch_model.bin?download=true")

import spaces
import gradio as gr
import torch
import numpy as np 
import cv2
from diffusers import StableDiffusionXLPipeline
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline
from PIL import Image
from ip_adapter import IPAdapterXL
base_model_path = "stabilityai/stable-diffusion-xl-base-1.0"
device = "cuda"


image_encoder_path = donwload_repo_loc #"sdxl_models/image_encoder"
ip_ckpt = "./models/ip-adapter_sdxl.bin"
# load SDXL pipeline
pipe = StableDiffusionXLPipeline.from_pretrained(
    base_model_path,
    torch_dtype=torch.float16,
    add_watermarker=False,
)


controlnet_path = "diffusers/controlnet-canny-sdxl-1.0"
controlnet = ControlNetModel.from_pretrained(controlnet_path, use_safetensors=False, torch_dtype=torch.float16).to(device)

contronet_pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    base_model_path,
    controlnet=controlnet,
    torch_dtype=torch.float16,
    add_watermarker=False,
)



@spaces.GPU(enable_queue=True)
def create_image_controlnet(image_pil,input_image,target,prompt,n_prompt,scale, control_scale, guidance_scale,num_samples,num_inference_steps,seed):
    # load ip-adapter
    ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"])

    image_pil=image_pil.resize((512, 512))
    cv_input_image = pil_to_cv2(input_image)
    detected_map = cv2.Canny(cv_input_image, 50, 200)
    canny_map = Image.fromarray(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB))

    images = ip_model.generate(pil_image=image_pil,
                            prompt=prompt,
                            negative_prompt=n_prompt,
                            scale=scale,
                            guidance_scale=guidance_scale,
                            num_samples=num_samples,
                            num_inference_steps=num_inference_steps, 
                            seed=seed,
                            image=canny_map,
                            controlnet_conditioning_scale=control_scale,
                            )
    del ip_model

    return images

def pil_to_cv2(image_pil):
    image_np = np.array(image_pil)
    image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
    
    return image_cv2


# generate image variations with only image prompt
@spaces.GPU(enable_queue=True)
def create_image(image_pil,target,prompt,n_prompt,scale, guidance_scale,num_samples,num_inference_steps,seed):
    # load ip-adapter
    if target =="Load original IP-Adapter":
        # target_blocks=["blocks"] for original IP-Adapter
        ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["blocks"])
    elif target=="Load only style blocks":
        # target_blocks=["up_blocks.0.attentions.1"] for style blocks only
        ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1"])
    elif target == "Load style+layout block":
        # target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] # for style+layout blocks
        ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"])
    

    image_pil=image_pil.resize((512, 512))
    images = ip_model.generate(pil_image=image_pil,
                            prompt=prompt,
                            negative_prompt=n_prompt,
                            scale=scale,
                            guidance_scale=guidance_scale,
                            num_samples=num_samples,
                            num_inference_steps=num_inference_steps, 
                            seed=seed,
                            #neg_content_prompt="a rabbit",
                            #neg_content_scale=0.5,
                            )

    # images[0].save("result.png")    
    del ip_model
    
    return images


DESCRIPTION = """
# InstantStyle: Free Lunch towards Style-Preserving in Text-to-Image Generation
**Demo by [ameer azam] - [Twitter](https://twitter.com/Ameerazam18) - [GitHub](https://github.com/AMEERAZAM08)) - [Hugging Face](https://huggingface.co/ameerazam08)**
This is a demo of  https://github.com/InstantStyle/InstantStyle.
"""

block = gr.Blocks(css="footer {visibility: hidden}").queue(max_size=10)
with block:
    with gr.Tab("Instant Syle"):
        with gr.Row():
       
            with gr.Column():
                # gr.Markdown("## <h1 align='center'>InstantStyle: Free Lunch towards Style-Preserving in Text-to-Image Generation  </h1>")
                gr.Markdown(DESCRIPTION)
        with gr.Row():
            with gr.Column():
                image_pil = gr.Image(label="Style Image", type='pil')
                target = gr.Dropdown(["Load original IP-Adapter","Load only style blocks","Load style+layout block"], label="Load Style", info="IP-Adapter Layers")
                prompt = gr.Textbox(label="Prompt",value="a cat, masterpiece, best quality, high quality")
                n_prompt = gr.Textbox(label="Neg Prompt",value="text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry")
                scale = gr.Slider(minimum=0,maximum=2.0, step=0.01,value=1.0, label="scale")
                guidance_scale = gr.Slider(minimum=1,maximum=15.0, step=0.01,value=5.0, label="guidance_scale")
                num_samples= gr.Slider(minimum=1,maximum=3.0, step=1.0,value=1.0, label="num_samples")
                num_inference_steps = gr.Slider(minimum=5,maximum=50.0, step=1.0,value=30, label="num_inference_steps")
                seed = gr.Slider(minimum=-1000000,maximum=1000000,value=1, step=1, label="Seed Value")
                generate_button = gr.Button("Generate Image")
            with gr.Column():
                generated_image = gr.Gallery(label="Generated Image")

        generate_button.click(fn=create_image, inputs=[image_pil,target,prompt,n_prompt,scale, guidance_scale,num_samples,num_inference_steps,seed], 
                                  outputs=[generated_image])
    with gr.Tab("Image stylization Style"):
        with gr.Row():

            with gr.Column():
                gr.Markdown("""
                    # Imagestylization-Style: Free Lunch towards Style-Preserving in Text-to-Image Generation
                    **Demo by [ameer azam] - [Twitter](https://twitter.com/Ameerazam18) - [GitHub](https://github.com/AMEERAZAM08)) - [Hugging Face](https://huggingface.co/ameerazam08)**
                    This is a demo of  https://github.com/InstantStyle/InstantStyle.
                    """)
                with gr.Row():
                    with gr.Column():
                        src_image_pil = gr.Image(label="Source Image", type='pil')
                    with gr.Column():
                        image_pil = gr.Image(label="Style Image", type='pil')
                prompt = gr.Textbox(label="Prompt",value="masterpiece, best quality, high quality")
                n_prompt = gr.Textbox(label="Neg Prompt",value="text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry")
                scale = gr.Slider(minimum=0,maximum=2.0, step=0.01,value=1.0, label="scale")
                control_scale = gr.Slider(minimum=0,maximum=1.0, step=0.01,value=0.6, label="controlnet conditioning scale")
                guidance_scale = gr.Slider(minimum=1,maximum=15.0, step=0.01,value=5.0, label="guidance scale")
                num_samples= gr.Slider(minimum=1,maximum=4.0, step=1.0,value=1.0, label="num samples")
                num_inference_steps = gr.Slider(minimum=5,maximum=50.0, step=1.0,value=30, label="num inference steps")
                seed = gr.Slider(minimum=-1000000,maximum=1000000,value=1, step=1, label="Seed Value")
                generate_button = gr.Button("Generate Image")
            with gr.Column():
                generated_image = gr.Gallery(label="Generated Image")

        generate_button.click(fn=create_image_controlnet, 
                            inputs=[image_pil,src_image_pil,prompt,n_prompt,scale, control_scale, guidance_scale,num_samples,num_inference_steps,seed], 
                            outputs=[generated_image])

block.launch()