Spaces:

ucaslx
/

Kolors-IP-Adapter-Plus

Runtime error

File size: 7,334 Bytes

52b67df
f92c162
 
52b67df
 
d5bcc1a
52b67df
 
6ef1dc4
 
52b67df
 
f92c162
ae6a57b
 
 
 
 
f92c162
52b67df
 
 
 
 
 
 
 
 
 
d5bcc1a
 
 
 
 
 
 
 
 
52b67df
 
 
 
 
 
 
 
 
 
d5bcc1a
 
52b67df
d5bcc1a
f92c162
 
52b67df
f92c162
d5bcc1a
f92c162
 
 
d5bcc1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f92c162
 
d5bcc1a
dcbae16
9e63d68
ce743f5
f92c162
 
 
 
 
 
9dcb09a
 
 
dcbae16
9dcb09a
 
 
 
ce743f5
f92c162
22231ac
f92c162
 
 
ce743f5
 
f92c162
ce743f5
cf4f8fb
ce743f5
 
 
 
 
 
 
f92c162
 
ce743f5
 
 
 
 
 
f92c162
ce743f5
 
 
 
 
 
 
 
 
 
 
 
f92c162
ce743f5
f92c162
ce743f5
 
 
 
 
 
 
52b67df
ce743f5
 
 
 
 
 
52b67df
ce743f5
 
 
 
 
 
 
52b67df
ce743f5
 
 
 
 
 
 
 
 
 
 
 
 
f92c162
ce743f5
dcbae16
ce743f5
f92c162
 
 
d5bcc1a
f92c162
 
 
ae6a57b

import spaces
import random
import torch
from huggingface_hub import snapshot_download
from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
from kolors.pipelines import pipeline_stable_diffusion_xl_chatglm_256_ipadapter, pipeline_stable_diffusion_xl_chatglm_256
from kolors.models.modeling_chatglm import ChatGLMModel
from kolors.models.tokenization_chatglm import ChatGLMTokenizer
from kolors.models.unet_2d_condition import UNet2DConditionModel
from diffusers import AutoencoderKL, EulerDiscreteScheduler
import gradio as gr
import numpy as np

device = "cuda"
ckpt_dir = '/home/lixiang46/Kolors/weights/Kolors'
ckpt_IPA_dir = '/home/lixiang46/Kolors/weights/Kolors-IP-Adapter-Plus'
# ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
# ckpt_IPA_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors-IP-Adapter-Plus")

# Load models
text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
image_encoder = CLIPVisionModelWithProjection.from_pretrained(f'{ckpt_IPA_dir}/image_encoder',ignore_mismatched_sizes=True).to(dtype=torch.float16, device=device)
ip_img_size = 336
clip_image_processor = CLIPImageProcessor(size=ip_img_size, crop_size=ip_img_size)

pipe_t2i = pipeline_stable_diffusion_xl_chatglm_256.StableDiffusionXLPipeline(
    vae=vae,text_encoder=text_encoder, 
    tokenizer=tokenizer, 
    unet=unet, 
    scheduler=scheduler, 
    force_zeros_for_empty_prompt=False
).to(device)

pipe_i2i = pipeline_stable_diffusion_xl_chatglm_256_ipadapter.StableDiffusionXLPipeline(
    vae=vae,
    text_encoder=text_encoder,
    tokenizer=tokenizer,
    unet=unet,
    scheduler=scheduler,
    image_encoder=image_encoder,
    feature_extractor=clip_image_processor,
    force_zeros_for_empty_prompt=False
).to(device)

if hasattr(pipe_i2i.unet, 'encoder_hid_proj'):
    pipe_i2i.unet.text_encoder_hid_proj = pipe_i2i.unet.encoder_hid_proj
    
pipe_i2i.load_ip_adapter( f'{ckpt_IPA_dir}' , subfolder="", weight_name=["ip_adapter_plus_general.bin"])

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 2048

def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, ip_adapter_image = None, ip_adapter_scale = None):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator().manual_seed(seed)

    if ip_adapter_image is None:
        image = pipe_t2i(
            prompt = prompt, 
            negative_prompt = negative_prompt,
            guidance_scale = guidance_scale, 
            num_inference_steps = num_inference_steps, 
            width = width, 
            height = height,
            generator = generator
        ).images[0] 
        return image
    else:
        pipe_i2i.set_ip_adapter_scale([ip_adapter_scale])
        image = pipe_i2i(
                prompt= prompt ,
                ip_adapter_image=[ip_adapter_image],
                negative_prompt=negative_prompt, 
                height=height,
                width=width,
                num_inference_steps=num_inference_steps, 
                guidance_scale=guidance_scale,
                num_images_per_prompt=1,
                generator=generator
            ).images[0]
        return image

examples = [
    [None, "一张瓢虫的照片，微距，变焦，高质量，电影，拿着一个牌子，写着“可图”", None],
    ["穿着黑色T恤衫，上面中文绿色大字写着“可图”", "image/test_ip.jpg", 0.5],
    ["一只可爱的小狗在奔跑", "image/test_ip2.png", 0.5]
]

if torch.cuda.is_available():
    power_device = "GPU"
else:
    power_device = "CPU"

css="""
#col-container {
    margin: 0 auto;
    max-width: 650px;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Row():
        gr.Markdown(f"""
        # Kolors-IP-Adapter-Plus
        Currently running on {power_device}.
        """)
        
    with gr.Row():
        with gr.Column(elem_id="col-container"):
            with gr.Row():
                ip_adapter_image = gr.Image(label="IP-Adapter Image", type="pil")
            with gr.Row():
                ip_adapter_scale = gr.Slider(
                    label="Image influence scale",
                    info="Use 1 for creating variations",
                    minimum=0.0,
                    maximum=1.0,
                    step=0.05,
                    value=0.5,
                )
            with gr.Row():
                prompt = gr.Text(
                    label="Prompt",
                    show_label=False,
                    max_lines=1,
                    placeholder="Enter your prompt",
                    container=False,
                )
                run_button = gr.Button("Run", scale=0)
            with gr.Accordion("Advanced Settings", open=False):
                negative_prompt = gr.Text(
                    label="Negative prompt",
                    max_lines=1,
                    placeholder="Enter a negative prompt",
                    visible=True,
                )
                seed = gr.Slider(
                    label="Seed",
                    minimum=0,
                    maximum=MAX_SEED,
                    step=1,
                    value=0,
                )
                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                with gr.Row():
                    width = gr.Slider(
                        label="Width",
                        minimum=256,
                        maximum=MAX_IMAGE_SIZE,
                        step=32,
                        value=1024,
                    )
                    height = gr.Slider(
                        label="Height",
                        minimum=256,
                        maximum=MAX_IMAGE_SIZE,
                        step=32,
                        value=1024,
                    )
                with gr.Row():
                    guidance_scale = gr.Slider(
                        label="Guidance scale",
                        minimum=0.0,
                        maximum=10.0,
                        step=0.1,
                        value=5.0,
                    )
                    num_inference_steps = gr.Slider(
                        label="Number of inference steps",
                        minimum=10,
                        maximum=50,
                        step=1,
                        value=25,
                    )
            
        with gr.Column(elem_id="col-container"):
            result = gr.Image(label="Result", show_label=False)
    
    with gr.Row():
        gr.Examples(
                examples = examples,
                inputs = [prompt, ip_adapter_image, ip_adapter_scale]
            )

    run_button.click(
        fn = infer,
        inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, ip_adapter_image, ip_adapter_scale],
        outputs = [result]
    )

demo.queue().launch(share=True)