import os donwload_repo_loc= "./models/image_encoder/" os.system("pip install -U peft") # os.system(f"wget -O {donwload_repo_loc}config.json https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/config.json?download=true") # os.system(f"wget -O {donwload_repo_loc}model.safetensors https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/model.safetensors?download=true") # os.system(f"wget -O {donwload_repo_loc}pytorch_model.bin https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/pytorch_model.bin?download=true") import spaces import gradio as gr import torch import numpy as np import cv2 from diffusers import StableDiffusionXLPipeline from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline from PIL import Image from ip_adapter import IPAdapterXL base_model_path = "stabilityai/stable-diffusion-xl-base-1.0" device = "cuda" image_encoder_path = donwload_repo_loc #"sdxl_models/image_encoder" ip_ckpt = "./models/ip-adapter_sdxl.bin" # load SDXL pipeline pipe = StableDiffusionXLPipeline.from_pretrained( base_model_path, torch_dtype=torch.float16, add_watermarker=False, ) controlnet_path = "diffusers/controlnet-canny-sdxl-1.0" controlnet = ControlNetModel.from_pretrained(controlnet_path, use_safetensors=False, torch_dtype=torch.float16).to(device) contronet_pipe = StableDiffusionXLControlNetPipeline.from_pretrained( base_model_path, controlnet=controlnet, torch_dtype=torch.float16, add_watermarker=False, ) @spaces.GPU(enable_queue=True) def create_image_controlnet(image_pil,input_image,target,prompt,n_prompt,scale, control_scale, guidance_scale,num_samples,num_inference_steps,seed): # load ip-adapter ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"]) image_pil=image_pil.resize((512, 512)) cv_input_image = pil_to_cv2(input_image) detected_map = cv2.Canny(cv_input_image, 50, 200) canny_map = Image.fromarray(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB)) images = ip_model.generate(pil_image=image_pil, prompt=prompt, negative_prompt=n_prompt, scale=scale, guidance_scale=guidance_scale, num_samples=num_samples, num_inference_steps=num_inference_steps, seed=seed, image=canny_map, controlnet_conditioning_scale=control_scale, ) del ip_model return images def pil_to_cv2(image_pil): image_np = np.array(image_pil) image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR) return image_cv2 # generate image variations with only image prompt @spaces.GPU(enable_queue=True) def create_image(image_pil,target,prompt,n_prompt,scale, guidance_scale,num_samples,num_inference_steps,seed): # load ip-adapter if target =="Load original IP-Adapter": # target_blocks=["blocks"] for original IP-Adapter ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["blocks"]) elif target=="Load only style blocks": # target_blocks=["up_blocks.0.attentions.1"] for style blocks only ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1"]) elif target == "Load style+layout block": # target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] # for style+layout blocks ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"]) image_pil=image_pil.resize((512, 512)) images = ip_model.generate(pil_image=image_pil, prompt=prompt, negative_prompt=n_prompt, scale=scale, guidance_scale=guidance_scale, num_samples=num_samples, num_inference_steps=num_inference_steps, seed=seed, #neg_content_prompt="a rabbit", #neg_content_scale=0.5, ) # images[0].save("result.png") del ip_model return images DESCRIPTION = """ # InstantStyle: Free Lunch towards Style-Preserving in Text-to-Image Generation **Demo by [ameer azam] - [Twitter](https://twitter.com/Ameerazam18) - [GitHub](https://github.com/AMEERAZAM08)) - [Hugging Face](https://huggingface.co/ameerazam08)** This is a demo of https://github.com/InstantStyle/InstantStyle. """ block = gr.Blocks(css="footer {visibility: hidden}").queue(max_size=10) with block: with gr.Tab("Instant Syle"): with gr.Row(): with gr.Column(): # gr.Markdown("##