File size: 8,859 Bytes
6dc8351 fe068dc 6dc8351 a895534 6dc8351 0dcfce6 6dc8351 0dcfce6 6dc8351 0dcfce6 6dc8351 0dcfce6 6dc8351 733468c 6dc8351 733468c 6dc8351 9c085bb 6dc8351 0dcfce6 6dc8351 0dcfce6 6dc8351 02529b4 6dc8351 0dcfce6 6dc8351 0dcfce6 6dc8351 0dcfce6 6dc8351 0dcfce6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import os
donwload_repo_loc= "./models/image_encoder/"
os.system("pip install -U peft")
# os.system(f"wget -O {donwload_repo_loc}config.json https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/config.json?download=true")
# os.system(f"wget -O {donwload_repo_loc}model.safetensors https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/model.safetensors?download=true")
# os.system(f"wget -O {donwload_repo_loc}pytorch_model.bin https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/pytorch_model.bin?download=true")
import spaces
import gradio as gr
import torch
import numpy as np
import cv2
from diffusers import StableDiffusionXLPipeline
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline
from PIL import Image
from ip_adapter import IPAdapterXL
base_model_path = "stabilityai/stable-diffusion-xl-base-1.0"
device = "cuda"
image_encoder_path = donwload_repo_loc #"sdxl_models/image_encoder"
ip_ckpt = "./models/ip-adapter_sdxl.bin"
# load SDXL pipeline
pipe = StableDiffusionXLPipeline.from_pretrained(
base_model_path,
torch_dtype=torch.float16,
add_watermarker=False,
)
controlnet_path = "diffusers/controlnet-canny-sdxl-1.0"
controlnet = ControlNetModel.from_pretrained(controlnet_path, use_safetensors=False, torch_dtype=torch.float16).to(device)
contronet_pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
base_model_path,
controlnet=controlnet,
torch_dtype=torch.float16,
add_watermarker=False,
)
@spaces.GPU(enable_queue=True)
def create_image_controlnet(image_pil,input_image,target,prompt,n_prompt,scale, control_scale, guidance_scale,num_samples,num_inference_steps,seed):
# load ip-adapter
ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"])
image_pil=image_pil.resize((512, 512))
cv_input_image = pil_to_cv2(input_image)
detected_map = cv2.Canny(cv_input_image, 50, 200)
canny_map = Image.fromarray(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB))
images = ip_model.generate(pil_image=image_pil,
prompt=prompt,
negative_prompt=n_prompt,
scale=scale,
guidance_scale=guidance_scale,
num_samples=num_samples,
num_inference_steps=num_inference_steps,
seed=seed,
image=canny_map,
controlnet_conditioning_scale=control_scale,
)
del ip_model
return images
def pil_to_cv2(image_pil):
image_np = np.array(image_pil)
image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
return image_cv2
# generate image variations with only image prompt
@spaces.GPU(enable_queue=True)
def create_image(image_pil,target,prompt,n_prompt,scale, guidance_scale,num_samples,num_inference_steps,seed):
# load ip-adapter
if target =="Load original IP-Adapter":
# target_blocks=["blocks"] for original IP-Adapter
ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["blocks"])
elif target=="Load only style blocks":
# target_blocks=["up_blocks.0.attentions.1"] for style blocks only
ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1"])
elif target == "Load style+layout block":
# target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] # for style+layout blocks
ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"])
image_pil=image_pil.resize((512, 512))
images = ip_model.generate(pil_image=image_pil,
prompt=prompt,
negative_prompt=n_prompt,
scale=scale,
guidance_scale=guidance_scale,
num_samples=num_samples,
num_inference_steps=num_inference_steps,
seed=seed,
#neg_content_prompt="a rabbit",
#neg_content_scale=0.5,
)
# images[0].save("result.png")
del ip_model
return images
DESCRIPTION = """
# InstantStyle: Free Lunch towards Style-Preserving in Text-to-Image Generation
**Demo by [ameer azam] - [Twitter](https://twitter.com/Ameerazam18) - [GitHub](https://github.com/AMEERAZAM08)) - [Hugging Face](https://huggingface.co/ameerazam08)**
This is a demo of https://github.com/InstantStyle/InstantStyle.
"""
block = gr.Blocks(css="footer {visibility: hidden}").queue(max_size=10)
with block:
with gr.Tab("Instant Syle"):
with gr.Row():
with gr.Column():
# gr.Markdown("## <h1 align='center'>InstantStyle: Free Lunch towards Style-Preserving in Text-to-Image Generation </h1>")
gr.Markdown(DESCRIPTION)
with gr.Row():
with gr.Column():
image_pil = gr.Image(label="Style Image", type='pil')
target = gr.Dropdown(["Load original IP-Adapter","Load only style blocks","Load style+layout block"], label="Load Style", info="IP-Adapter Layers")
prompt = gr.Textbox(label="Prompt",value="a cat, masterpiece, best quality, high quality")
n_prompt = gr.Textbox(label="Neg Prompt",value="text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry")
scale = gr.Slider(minimum=0,maximum=2.0, step=0.01,value=1.0, label="scale")
guidance_scale = gr.Slider(minimum=1,maximum=15.0, step=0.01,value=5.0, label="guidance_scale")
num_samples= gr.Slider(minimum=1,maximum=3.0, step=1.0,value=1.0, label="num_samples")
num_inference_steps = gr.Slider(minimum=5,maximum=50.0, step=1.0,value=30, label="num_inference_steps")
seed = gr.Slider(minimum=-1000000,maximum=1000000,value=1, step=1, label="Seed Value")
generate_button = gr.Button("Generate Image")
with gr.Column():
generated_image = gr.Gallery(label="Generated Image")
generate_button.click(fn=create_image, inputs=[image_pil,target,prompt,n_prompt,scale, guidance_scale,num_samples,num_inference_steps,seed],
outputs=[generated_image])
with gr.Tab("Image stylization Style"):
with gr.Row():
with gr.Column():
gr.Markdown("""
# Imagestylization-Style: Free Lunch towards Style-Preserving in Text-to-Image Generation
**Demo by [ameer azam] - [Twitter](https://twitter.com/Ameerazam18) - [GitHub](https://github.com/AMEERAZAM08)) - [Hugging Face](https://huggingface.co/ameerazam08)**
This is a demo of https://github.com/InstantStyle/InstantStyle.
""")
with gr.Row():
with gr.Column():
src_image_pil = gr.Image(label="Source Image", type='pil')
with gr.Column():
image_pil = gr.Image(label="Style Image", type='pil')
prompt = gr.Textbox(label="Prompt",value="masterpiece, best quality, high quality")
n_prompt = gr.Textbox(label="Neg Prompt",value="text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry")
scale = gr.Slider(minimum=0,maximum=2.0, step=0.01,value=1.0, label="scale")
control_scale = gr.Slider(minimum=0,maximum=1.0, step=0.01,value=0.6, label="controlnet conditioning scale")
guidance_scale = gr.Slider(minimum=1,maximum=15.0, step=0.01,value=5.0, label="guidance scale")
num_samples= gr.Slider(minimum=1,maximum=4.0, step=1.0,value=1.0, label="num samples")
num_inference_steps = gr.Slider(minimum=5,maximum=50.0, step=1.0,value=30, label="num inference steps")
seed = gr.Slider(minimum=-1000000,maximum=1000000,value=1, step=1, label="Seed Value")
generate_button = gr.Button("Generate Image")
with gr.Column():
generated_image = gr.Gallery(label="Generated Image")
generate_button.click(fn=create_image_controlnet,
inputs=[image_pil,src_image_pil,prompt,n_prompt,scale, control_scale, guidance_scale,num_samples,num_inference_steps,seed],
outputs=[generated_image])
block.launch()
|