import os import gradio as gr import torch import yaml import numpy as np from munch import munchify import torchvision.transforms as transforms from torchvision.transforms import functional as F from diffusers import ( AutoPipelineForInpainting, ) from generate_dataset import outpainting_generator_rectangle transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((512, 512), interpolation=F.InterpolationMode.LANCZOS), ]) def pref_inpainting(image, box_width_ratio, mask_random_start, steps, ): with open("/data0/kendong/Diffusions/zero123-live/configs/imagereward_train_configs.yaml") as file: config_dict= yaml.safe_load(file) config = munchify(config_dict) pipe = AutoPipelineForInpainting.from_pretrained( '/data1/kendong/joint-rl-diffusion/alignment_log/exp_reward_group_regression_all_1w_1.6boundary/iteration_2560', num_inference_steps=steps) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") pipe = pipe.to(device) color, mask = outpainting_generator_rectangle(image, box_width_ratio/100, mask_random_start) mask = mask.convert('L') color, mask = np.array(color).transpose(2, 0, 1), np.array(mask) mask = mask[None, ...] mask_ = np.zeros_like(mask) mask_[mask < 125] = 0 mask_[mask >= 125] = 1 color = torch.from_numpy(color).to(device) mask = torch.from_numpy(mask).to(device) color, mask = transform(color), transform(mask) res = pipe(prompt='', image=color, mask_image=mask, eta=config.eta).images[0] # res.save(os.path.join('./', 'test.png')) return color, res inputs = [ gr.Image(type="pil", image_mode="RGBA", label='Input Image'), # shape=[512, 512] gr.Slider(30, 45, value=35, step=1, label="box_width_ratio"), gr.Slider(0, 256, value=125, step=1, label="mask_random_start"), gr.Slider(30, 100, value=50, step=5, label="steps"), ] outputs = [ gr.Image(type="pil", image_mode="RGBA", label='Mask RGB Image', container=True, width="65%"), gr.Image(type="pil", image_mode="RGBA", label='Results', container=True, width="65%"), ] examples = [ ["/data0/kendong/Diffusions/zero123-live/test_demo/assets/ILSVRC2012_test_00000003.JPEG", 35, 125, 50], ["/data0/kendong/Diffusions/zero123-live/test_demo/assets/ILSVRC2012_test_00000181.JPEG", 35, 125, 50], ["/data0/kendong/Diffusions/zero123-live/test_demo/assets/ILSVRC2012_test_00002334.JPEG", 35, 125, 50], ["/data0/kendong/Diffusions/zero123-live/test_demo/assets/ILSVRC2012_test_00002613.JPEG", 35, 125, 50], ] iface = gr.Interface( fn=pref_inpainting, inputs=inputs, outputs=outputs, title="Inpainting with Human Preference (Utilizing Free CPU Resources)", description="Upload an image and start your inpainting (currently only supporting outpainting masks; other mask types coming soon).", theme="default", examples= examples, allow_flagging="never" ) iface.launch(share=True)