|
import gradio as gr |
|
import numpy as np |
|
from diffusers import StableDiffusionXLControlNetInpaintPipeline |
|
from diffusers import StableDiffusionXLImg2ImgPipeline, DPMSolverMultistepScheduler, AutoencoderTiny, StableDiffusionXLControlNetPipeline, ControlNetModel |
|
from diffusers.utils import load_image |
|
from diffusers.image_processor import IPAdapterMaskProcessor |
|
import torch |
|
import os |
|
from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor |
|
from diffusers.utils import make_image_grid |
|
from diffusers import DPMSolverSDEScheduler |
|
|
|
|
|
MAX_SEED = np.iinfo(np.int32).max |
|
MAX_IMAGE_SIZE = 1024 |
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
if torch.cuda.is_available(): |
|
torch_dtype = torch.float16 |
|
else: |
|
torch_dtype = torch.float32 |
|
|
|
processor_mask = IPAdapterMaskProcessor() |
|
controlnets = [ |
|
ControlNetModel.from_pretrained( |
|
"diffusers/controlnet-depth-sdxl-1.0",variant="fp16",use_safetensors=True,torch_dtype=torch.float16 |
|
), |
|
ControlNetModel.from_pretrained( |
|
"diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16, use_safetensors=True,variant="fp16" |
|
), |
|
] |
|
|
|
pipe_CN = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V5.0", torch_dtype=torch.float16,controlnet=[controlnets[0],controlnets[0]], use_safetensors=True, variant='fp16') |
|
|
|
pipe_CN.scheduler=DPMSolverSDEScheduler.from_pretrained("SG161222/RealVisXL_V5.0",subfolder="scheduler",use_karras_sigmas=True) |
|
|
|
pipe_CN.to("cuda") |
|
state_dict, network_alphas = StableDiffusionXLControlNetPipeline.lora_state_dict('CreativesCombined/hb8_cases_dreambooth_lora_test_1_14', weight_name='pytorch_lora_weights.safetensors') |
|
pipe_CN.load_lora_into_unet(state_dict, network_alphas, pipe_CN.unet, adapter_name='unet_cases') |
|
pipe_CN.load_lora_into_text_encoder(state_dict, network_alphas, pipe_CN.text_encoder, adapter_name='text_cases') |
|
pipe_CN.load_lora_into_text_encoder(state_dict, network_alphas, pipe_CN.text_encoder ,prefix='2', adapter_name='text_2_cases') |
|
pipe_CN.set_adapters(["unet_cases","text_cases","text_2_cases"], adapter_weights=[1.0, 0.5,0.5]) |
|
|
|
refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0",text_encoder_2=pipe_CN.text_encoder_2,vae=pipe_CN.vae,torch_dtype=torch.float16,use_safetensors=True,variant="fp16") |
|
refiner.to("cuda") |
|
|
|
pipe_IN = StableDiffusionXLControlNetInpaintPipeline.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1",controlnet=controlnets, torch_dtype=torch.float16, variant="fp16").to("cuda") |
|
pipe_IN.load_lora_weights('Tonioesparza/ourhood_training_dreambooth_lora_2_0', weight_name='pytorch_lora_weights.safetensors',adapter_name='ourhood') |
|
pipe_IN.to("cuda") |
|
|
|
def make_inpaint_condition(image, image_mask): |
|
image = np.array(image.convert("RGB")).astype(np.float32) / 255.0 |
|
image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0 |
|
assert image.shape[0:1] == image_mask.shape[0:1] |
|
image[image_mask > 0.5] = -1.0 |
|
image = np.expand_dims(image, 0).transpose(0, 3, 1, 2) |
|
image = torch.from_numpy(image) |
|
return image |
|
|
|
def ourhood_inference(prompt=str,num_inference_steps=int,scaffold=int,seed=int): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scaff_dic={1:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_square_2.png", |
|
'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_noroof_square.png", |
|
'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_solo_square.png"}, |
|
2:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_C.png", |
|
'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_C.png", |
|
'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_C_solo.png"}, |
|
3:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_B.png", |
|
'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_B.png", |
|
'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_B_solo.png"}} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
output_height = 1024 |
|
output_width = 1024 |
|
mask1 = load_image(scaff_dic[scaffold]['mask1']) |
|
masks = processor_mask.preprocess([mask1], height=output_height, width=output_width) |
|
masks = [masks.reshape(1, masks.shape[0], masks.shape[2], masks.shape[3])] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
n_steps = num_inference_steps |
|
|
|
depth_image = load_image(scaff_dic[scaffold]['depth_image']) |
|
canny_image = load_image(scaff_dic[scaffold]['canny_image']) |
|
|
|
masked_depth=make_inpaint_condition(depth_image,mask1) |
|
|
|
images_CN = [depth_image, canny_image] |
|
|
|
|
|
prompt1 = 'A frontpage still-life photograph, an 8-foot wooden crate, '+ prompt +' in the style of hb8 interior architecture' |
|
neg1 = 'text,watermark' |
|
prompt2 = 'Photorealistic rendering, of an OurHood privacy booth, with a silken oak frame, hickory stained melange polyester fabric, windows' |
|
neg2 = 'curtains, pillows' |
|
generator = torch.manual_seed(seed) |
|
|
|
results = pipe_CN( |
|
prompt=prompt1, |
|
|
|
negative_prompt=neg1, |
|
num_inference_steps=n_steps, |
|
num_images_per_prompt=1, |
|
generator=generator, |
|
denoising_end=0.9, |
|
image=[depth_image,masked_depth], |
|
output_type="latent", |
|
control_guidance_start=[0.0,0.5], |
|
control_guidance_end=[0.5,1.0], |
|
controlnet_conditioning_scale=[0.5,1.0], |
|
).images[0] |
|
|
|
|
|
|
|
|
|
|
|
image = refiner( |
|
prompt=prompt1, |
|
num_inference_steps=n_steps, |
|
denoising_start=0.9, |
|
image=results).images[0] |
|
|
|
|
|
|
|
|
|
|
|
image = pipe_IN( |
|
prompt=prompt2, |
|
negative_prompt=neg2, |
|
image=image, |
|
mask_image=mask1, |
|
num_inference_steps=65, |
|
strength=1.0, |
|
control_guidance_end=[0.9,0.9], |
|
controlnet_conditioning_scale=[0.35, 0.65], |
|
control_image=images_CN, |
|
generator=generator, |
|
).images[0] |
|
|
|
|
|
|
|
|
|
return image |
|
|
|
|
|
|
|
|
|
""" |
|
image = refiner( |
|
prompt=prompt, |
|
num_inference_steps=40, |
|
denoising_start=0.8, |
|
image=image, |
|
).images[0] |
|
""" |
|
|
|
|
|
|
|
examples = [ |
|
"in a British museum, pavillion, masonry, high-tables and chairs", |
|
"in a high ceilinged atrium, glass front, plantwalls, concrete floor, furniture, golden hour", |
|
"in a colorful open office environment", |
|
" in a Nordic atrium environment"] |
|
|
|
css=""" |
|
#col-container { |
|
margin: 0 auto; |
|
max-width: 640px; |
|
} |
|
""" |
|
|
|
with gr.Blocks(css=css) as demo: |
|
|
|
with gr.Column(elem_id="col-container"): |
|
gr.Markdown(f""" |
|
# HB8-Ourhood inference test |
|
""") |
|
|
|
with gr.Row(): |
|
|
|
prompt = gr.Text( |
|
label="Setting prompt", |
|
show_label=False, |
|
max_lines=1, |
|
placeholder="Where do you want to show the Ourhood pod?", |
|
container=False, |
|
) |
|
|
|
run_button = gr.Button("Run", scale=0) |
|
|
|
|
|
result = gr.Image(label="Result", show_label=False) |
|
|
|
with gr.Accordion("Advanced Settings", open=False): |
|
|
|
perspective = gr.Slider( |
|
label="perspective", |
|
minimum=1, |
|
maximum=3, |
|
step=1, |
|
value=1, |
|
) |
|
|
|
seed = gr.Slider( |
|
label="tracking number (seed)", |
|
minimum=0, |
|
maximum=MAX_SEED, |
|
step=1, |
|
value=0, |
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
|
|
|
num_inference_steps = gr.Slider( |
|
label="Number of inference steps", |
|
minimum=35, |
|
maximum=50, |
|
step=1, |
|
value=35, |
|
) |
|
|
|
gr.Examples( |
|
examples = examples, |
|
inputs = [prompt] |
|
) |
|
gr.on( |
|
triggers=[run_button.click, prompt.submit], |
|
fn = ourhood_inference, |
|
inputs = [prompt, num_inference_steps, perspective,seed], |
|
outputs = [result] |
|
) |
|
|
|
demo.queue().launch() |
|
|
|
|