Spaces:

CreativesCombined
/

HB8_replica_workflow_pilot

Sleeping

File size: 9,303 Bytes

import gradio as gr
import numpy as np
from diffusers import StableDiffusionXLControlNetInpaintPipeline
from diffusers import StableDiffusionXLImg2ImgPipeline, DPMSolverMultistepScheduler, AutoencoderTiny, StableDiffusionXLControlNetPipeline, ControlNetModel
from diffusers.utils import load_image
from diffusers.image_processor import IPAdapterMaskProcessor
import torch
import os
from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
from diffusers.utils import make_image_grid
from diffusers import DPMSolverSDEScheduler


MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024
device = "cuda" if torch.cuda.is_available() else "cpu"

if torch.cuda.is_available():
    torch_dtype = torch.float16
else:
    torch_dtype = torch.float32

processor_mask = IPAdapterMaskProcessor()
controlnets = [
    ControlNetModel.from_pretrained(
        "diffusers/controlnet-depth-sdxl-1.0",variant="fp16",use_safetensors=True,torch_dtype=torch.float16
    ),
    ControlNetModel.from_pretrained(
        "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16, use_safetensors=True,variant="fp16"
    ),
]

pipe_CN = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V5.0", torch_dtype=torch.float16,controlnet=[controlnets[0],controlnets[0]], use_safetensors=True, variant='fp16')
###pipe_CN.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
pipe_CN.scheduler=DPMSolverSDEScheduler.from_pretrained("SG161222/RealVisXL_V5.0",subfolder="scheduler",use_karras_sigmas=True)

pipe_CN.to("cuda")
state_dict, network_alphas = StableDiffusionXLControlNetPipeline.lora_state_dict('CreativesCombined/hb8_cases_dreambooth_lora_test_1_14', weight_name='pytorch_lora_weights.safetensors')
pipe_CN.load_lora_into_unet(state_dict, network_alphas, pipe_CN.unet, adapter_name='unet_cases')
pipe_CN.load_lora_into_text_encoder(state_dict, network_alphas, pipe_CN.text_encoder, adapter_name='text_cases')
pipe_CN.load_lora_into_text_encoder(state_dict, network_alphas, pipe_CN.text_encoder ,prefix='2', adapter_name='text_2_cases')
pipe_CN.set_adapters(["unet_cases","text_cases","text_2_cases"], adapter_weights=[1.0, 0.5,0.5])

refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0",text_encoder_2=pipe_CN.text_encoder_2,vae=pipe_CN.vae,torch_dtype=torch.float16,use_safetensors=True,variant="fp16")
refiner.to("cuda")

pipe_IN = StableDiffusionXLControlNetInpaintPipeline.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1",controlnet=controlnets, torch_dtype=torch.float16, variant="fp16").to("cuda")
pipe_IN.load_lora_weights('Tonioesparza/ourhood_training_dreambooth_lora_2_0', weight_name='pytorch_lora_weights.safetensors',adapter_name='ourhood')
pipe_IN.to("cuda")

def make_inpaint_condition(image, image_mask):
    image = np.array(image.convert("RGB")).astype(np.float32) / 255.0
    image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0
    assert image.shape[0:1] == image_mask.shape[0:1]
    image[image_mask > 0.5] = -1.0  # set as masked pixel
    image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
    image = torch.from_numpy(image)
    return image

def ourhood_inference(prompt=str,num_inference_steps=int,scaffold=int,seed=int):

###pro_encode = pipe_cn.encode_text(prompt)

    ###pro_encode = pipe_CN.encode_text(prompt)[2]


### function has no formats defined

    scaff_dic={1:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_square_2.png",
                  'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_noroof_square.png",
                  'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_solo_square.png"},
               2:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_C.png",
                  'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_C.png",
                  'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_C_solo.png"},
               3:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_B.png",
                  'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_B.png",
                  'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_B_solo.png"}}


    

    ##############################load loras

    
    ###pipe_CN.fuse_lora()

    output_height = 1024
    output_width = 1024
    mask1 = load_image(scaff_dic[scaffold]['mask1'])
    masks = processor_mask.preprocess([mask1], height=output_height, width=output_width)
    masks = [masks.reshape(1, masks.shape[0], masks.shape[2], masks.shape[3])]

    
    
    ###ip_images init
    ###ip_img_1 = load_image(r"C:\Users\AntonioEsparzaGlisma\PycharmProjects\hB8\Cases\a-place-to_210930_HAY_A-PLACE-TO_091-768x1024.png")
    ###ip_images = [[ip_img_1]]
    ###pipe_CN.set_ip_adapter_scale([[0.7]])
    n_steps = num_inference_steps
    ###precomputed depth image
    depth_image = load_image(scaff_dic[scaffold]['depth_image'])
    canny_image = load_image(scaff_dic[scaffold]['canny_image'])

    masked_depth=make_inpaint_condition(depth_image,mask1)
    
    images_CN = [depth_image, canny_image]

    
    prompt1 = 'A frontpage still-life photograph, an 8-foot wooden crate, '+ prompt +' in the style of hb8 interior architecture'
    neg1 = 'text,watermark'
    prompt2 = 'Photorealistic rendering, of an OurHood privacy booth, with a silken oak frame, hickory stained melange polyester fabric, windows'
    neg2 = 'curtains, pillows'
    generator = torch.manual_seed(seed)

    results = pipe_CN(
            prompt=prompt1,
            ###ip_adapter_image=ip_images,
            negative_prompt=neg1,
            num_inference_steps=n_steps,
            num_images_per_prompt=1,
            generator=generator,
            denoising_end=0.9,
            image=[depth_image,masked_depth],
            output_type="latent",
            control_guidance_start=[0.0,0.5],
            control_guidance_end=[0.5,1.0],
            controlnet_conditioning_scale=[0.5,1.0],
        ).images[0]





    image = refiner(
        prompt=prompt1,
        num_inference_steps=n_steps,
        denoising_start=0.9,
        image=results).images[0]





    image = pipe_IN(
            prompt=prompt2,
            negative_prompt=neg2,
            image=image,
            mask_image=mask1,
            num_inference_steps=65,
            strength=1.0,
            control_guidance_end=[0.9,0.9],
            controlnet_conditioning_scale=[0.35, 0.65],
            control_image=images_CN,
            generator=generator,
        ).images[0]




    return image




"""
image = refiner(
    prompt=prompt,
    num_inference_steps=40,
    denoising_start=0.8,
    image=image,
).images[0]
"""

#@spaces.GPU #[uncomment to use ZeroGPU]

examples = [
    "in a British museum, pavillion, masonry, high-tables and chairs",
    "in a high ceilinged atrium, glass front, plantwalls, concrete floor, furniture, golden hour",
    "in a colorful open office environment",
    " in a Nordic atrium environment"]

css="""
#col-container {
    margin: 0 auto;
    max-width: 640px;
}
"""

with gr.Blocks(css=css) as demo:
    
    with gr.Column(elem_id="col-container"):
        gr.Markdown(f"""
        # HB8-Ourhood inference test
        """)
        
        with gr.Row():
            
            prompt = gr.Text(
                label="Setting prompt",
                show_label=False,
                max_lines=1,
                placeholder="Where do you want to show the Ourhood pod?",
                container=False,
            )
            
            run_button = gr.Button("Run", scale=0)

        
        result = gr.Image(label="Result", show_label=False)

        with gr.Accordion("Advanced Settings", open=False):
            
            perspective = gr.Slider(
                label="perspective",
                minimum=1,
                maximum=3,
                step=1,
                value=1,
            )
            
            seed = gr.Slider(
                label="tracking number (seed)",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=0,
            )
            

            with gr.Row():

                
                num_inference_steps = gr.Slider(
                    label="Number of inference steps",
                    minimum=35,
                    maximum=50,
                    step=1,
                    value=35, #Replace with defaults that work for your model
                )
        
        gr.Examples(
            examples = examples,
            inputs = [prompt]
        )
    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn = ourhood_inference,
        inputs = [prompt, num_inference_steps, perspective,seed],
        outputs = [result]
    )

demo.queue().launch()