|
--- |
|
license: apache-2.0 |
|
pipeline_tag: text-to-image |
|
--- |
|
|
|
# ***ControlNet Depth SDXL, support zoe, midias*** |
|
 |
|
|
|
# Example |
|
 |
|
|
|
 |
|
|
|
 |
|
|
|
 |
|
|
|
 |
|
|
|
 |
|
|
|
 |
|
|
|
 |
|
|
|
 |
|
|
|
 |
|
|
|
# How to use it |
|
```python |
|
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL |
|
from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler |
|
from PIL import Image |
|
import torch |
|
import random |
|
import numpy as np |
|
import cv2 |
|
|
|
|
|
from controlnet_aux import MidasDetector, ZoeDetector |
|
|
|
|
|
processor_zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators") |
|
processor_midas = MidasDetector.from_pretrained("lllyasviel/Annotators") |
|
|
|
|
|
controlnet_conditioning_scale = 1.0 |
|
prompt = "your prompt, the longer the better, you can describe it as detail as possible" |
|
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality' |
|
|
|
eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler") |
|
|
|
|
|
controlnet = ControlNetModel.from_pretrained( |
|
"xinsir/controlnet-depth-sdxl-1.0", |
|
torch_dtype=torch.float16 |
|
) |
|
|
|
# when test with other base model, you need to change the vae also. |
|
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) |
|
|
|
pipe = StableDiffusionXLControlNetPipeline.from_pretrained( |
|
"stabilityai/stable-diffusion-xl-base-1.0", |
|
controlnet=controlnet, |
|
vae=vae, |
|
safety_checker=None, |
|
torch_dtype=torch.float16, |
|
scheduler=eulera_scheduler, |
|
) |
|
|
|
# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance |
|
|
|
img = cv2.imread("your original image path") |
|
|
|
if random.random() > 0.5: |
|
controlnet_img = processor_zoe(img, output_type='cv2') |
|
else: |
|
controlnet_img = processor_midas(img, output_type='cv2') |
|
|
|
|
|
height, width, _ = controlnet_img.shape |
|
ratio = np.sqrt(1024. * 1024. / (width * height)) |
|
new_width, new_height = int(width * ratio), int(height * ratio) |
|
controlnet_img = cv2.resize(controlnet_img, (new_width, new_height)) |
|
controlnet_img = Image.fromarray(controlnet_img) |
|
|
|
|
|
images = pipe( |
|
prompt, |
|
negative_prompt=negative_prompt, |
|
image=controlnet_img, |
|
controlnet_conditioning_scale=controlnet_conditioning_scale, |
|
width=new_width, |
|
height=new_height, |
|
num_inference_steps=30, |
|
).images |
|
|
|
images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger") |
|
``` |