File size: 7,334 Bytes
52b67df
f92c162
 
52b67df
 
d5bcc1a
52b67df
 
6ef1dc4
 
52b67df
 
f92c162
ae6a57b
 
 
 
 
f92c162
52b67df
 
 
 
 
 
 
 
 
 
d5bcc1a
 
 
 
 
 
 
 
 
52b67df
 
 
 
 
 
 
 
 
 
d5bcc1a
 
52b67df
d5bcc1a
f92c162
 
52b67df
f92c162
d5bcc1a
f92c162
 
 
d5bcc1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f92c162
 
d5bcc1a
dcbae16
9e63d68
ce743f5
f92c162
 
 
 
 
 
9dcb09a
 
 
dcbae16
9dcb09a
 
 
 
ce743f5
f92c162
22231ac
f92c162
 
 
ce743f5
 
f92c162
ce743f5
cf4f8fb
ce743f5
 
 
 
 
 
 
f92c162
 
ce743f5
 
 
 
 
 
f92c162
ce743f5
 
 
 
 
 
 
 
 
 
 
 
f92c162
ce743f5
f92c162
ce743f5
 
 
 
 
 
 
52b67df
ce743f5
 
 
 
 
 
52b67df
ce743f5
 
 
 
 
 
 
52b67df
ce743f5
 
 
 
 
 
 
 
 
 
 
 
 
f92c162
ce743f5
dcbae16
ce743f5
f92c162
 
 
d5bcc1a
f92c162
 
 
ae6a57b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import spaces
import random
import torch
from huggingface_hub import snapshot_download
from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
from kolors.pipelines import pipeline_stable_diffusion_xl_chatglm_256_ipadapter, pipeline_stable_diffusion_xl_chatglm_256
from kolors.models.modeling_chatglm import ChatGLMModel
from kolors.models.tokenization_chatglm import ChatGLMTokenizer
from kolors.models.unet_2d_condition import UNet2DConditionModel
from diffusers import AutoencoderKL, EulerDiscreteScheduler
import gradio as gr
import numpy as np

device = "cuda"
ckpt_dir = '/home/lixiang46/Kolors/weights/Kolors'
ckpt_IPA_dir = '/home/lixiang46/Kolors/weights/Kolors-IP-Adapter-Plus'
# ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
# ckpt_IPA_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors-IP-Adapter-Plus")

# Load models
text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
image_encoder = CLIPVisionModelWithProjection.from_pretrained(f'{ckpt_IPA_dir}/image_encoder',ignore_mismatched_sizes=True).to(dtype=torch.float16, device=device)
ip_img_size = 336
clip_image_processor = CLIPImageProcessor(size=ip_img_size, crop_size=ip_img_size)

pipe_t2i = pipeline_stable_diffusion_xl_chatglm_256.StableDiffusionXLPipeline(
    vae=vae,text_encoder=text_encoder, 
    tokenizer=tokenizer, 
    unet=unet, 
    scheduler=scheduler, 
    force_zeros_for_empty_prompt=False
).to(device)

pipe_i2i = pipeline_stable_diffusion_xl_chatglm_256_ipadapter.StableDiffusionXLPipeline(
    vae=vae,
    text_encoder=text_encoder,
    tokenizer=tokenizer,
    unet=unet,
    scheduler=scheduler,
    image_encoder=image_encoder,
    feature_extractor=clip_image_processor,
    force_zeros_for_empty_prompt=False
).to(device)

if hasattr(pipe_i2i.unet, 'encoder_hid_proj'):
    pipe_i2i.unet.text_encoder_hid_proj = pipe_i2i.unet.encoder_hid_proj
    
pipe_i2i.load_ip_adapter( f'{ckpt_IPA_dir}' , subfolder="", weight_name=["ip_adapter_plus_general.bin"])

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 2048

def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, ip_adapter_image = None, ip_adapter_scale = None):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator().manual_seed(seed)

    if ip_adapter_image is None:
        image = pipe_t2i(
            prompt = prompt, 
            negative_prompt = negative_prompt,
            guidance_scale = guidance_scale, 
            num_inference_steps = num_inference_steps, 
            width = width, 
            height = height,
            generator = generator
        ).images[0] 
        return image
    else:
        pipe_i2i.set_ip_adapter_scale([ip_adapter_scale])
        image = pipe_i2i(
                prompt= prompt ,
                ip_adapter_image=[ip_adapter_image],
                negative_prompt=negative_prompt, 
                height=height,
                width=width,
                num_inference_steps=num_inference_steps, 
                guidance_scale=guidance_scale,
                num_images_per_prompt=1,
                generator=generator
            ).images[0]
        return image

examples = [
    [None, "一张瓢虫的照片,微距,变焦,高质量,电影,拿着一个牌子,写着“可图”", None],
    ["穿着黑色T恤衫,上面中文绿色大字写着“可图”", "image/test_ip.jpg", 0.5],
    ["一只可爱的小狗在奔跑", "image/test_ip2.png", 0.5]
]

if torch.cuda.is_available():
    power_device = "GPU"
else:
    power_device = "CPU"

css="""
#col-container {
    margin: 0 auto;
    max-width: 650px;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Row():
        gr.Markdown(f"""
        # Kolors-IP-Adapter-Plus
        Currently running on {power_device}.
        """)
        
    with gr.Row():
        with gr.Column(elem_id="col-container"):
            with gr.Row():
                ip_adapter_image = gr.Image(label="IP-Adapter Image", type="pil")
            with gr.Row():
                ip_adapter_scale = gr.Slider(
                    label="Image influence scale",
                    info="Use 1 for creating variations",
                    minimum=0.0,
                    maximum=1.0,
                    step=0.05,
                    value=0.5,
                )
            with gr.Row():
                prompt = gr.Text(
                    label="Prompt",
                    show_label=False,
                    max_lines=1,
                    placeholder="Enter your prompt",
                    container=False,
                )
                run_button = gr.Button("Run", scale=0)
            with gr.Accordion("Advanced Settings", open=False):
                negative_prompt = gr.Text(
                    label="Negative prompt",
                    max_lines=1,
                    placeholder="Enter a negative prompt",
                    visible=True,
                )
                seed = gr.Slider(
                    label="Seed",
                    minimum=0,
                    maximum=MAX_SEED,
                    step=1,
                    value=0,
                )
                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                with gr.Row():
                    width = gr.Slider(
                        label="Width",
                        minimum=256,
                        maximum=MAX_IMAGE_SIZE,
                        step=32,
                        value=1024,
                    )
                    height = gr.Slider(
                        label="Height",
                        minimum=256,
                        maximum=MAX_IMAGE_SIZE,
                        step=32,
                        value=1024,
                    )
                with gr.Row():
                    guidance_scale = gr.Slider(
                        label="Guidance scale",
                        minimum=0.0,
                        maximum=10.0,
                        step=0.1,
                        value=5.0,
                    )
                    num_inference_steps = gr.Slider(
                        label="Number of inference steps",
                        minimum=10,
                        maximum=50,
                        step=1,
                        value=25,
                    )
            
        with gr.Column(elem_id="col-container"):
            result = gr.Image(label="Result", show_label=False)
    
    with gr.Row():
        gr.Examples(
                examples = examples,
                inputs = [prompt, ip_adapter_image, ip_adapter_scale]
            )

    run_button.click(
        fn = infer,
        inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, ip_adapter_image, ip_adapter_scale],
        outputs = [result]
    )

demo.queue().launch(share=True)