ACE-Plus

Running on Zero

App Files Files Community

chaojiemao commited on Mar 4

Commit

a0efccd

1 Parent(s): fb6e008

modify inference

Browse files

Files changed (8) hide show

examples/examples.py +154 -2
infer_fft.py +178 -0
infer_lora.py +228 -0
inference/__init__.py +2 -0
inference/ace_plus_diffusers.py +7 -3
inference/ace_plus_inference.py +83 -0
inference/registry.py +228 -0
inference/utils.py +38 -11

examples/examples.py CHANGED Viewed

@@ -2,9 +2,9 @@ all_examples = [
             {
                 "input_image": None,
                 "input_mask": None,
-                "input_reference_image": "assets/samples/portrait/8f13fc996c99688f3af8e2300848a001.jpg",
                 "save_path": "examples/outputs/portrait_human_1.jpg",
-                "instruction": "Dress the character in the image with elf ears and a wizard's robe, transforming them into a mage character from a fantasy world.",
                 "output_h": 1024,
                 "output_w": 1024,
                 "seed": 4194866942,
@@ -78,4 +78,156 @@ all_examples = [
                 "edit_type": "repainting"
             }
         ]

             {
                 "input_image": None,
                 "input_mask": None,
+                "input_reference_image": "assets/samples/portrait/human_1.jpg",
                 "save_path": "examples/outputs/portrait_human_1.jpg",
+                "instruction": "Maintain the facial features, A girl is wearing a neat police uniform and sporting a badge. She is smiling with a friendly and confident demeanor. The background is blurred, featuring a cartoon logo.",
                 "output_h": 1024,
                 "output_w": 1024,
                 "seed": 4194866942,
                 "edit_type": "repainting"
             }
+        ]
+fft_examples =  [
+            {
+                "input_image": None,
+                "input_mask": None,
+                "input_reference_image": "./assets/samples/portrait/human_1.jpg",
+                "save_path": "examples/outputs/portrait_human_1.jpg",
+                "instruction": "Maintain the facial features, A girl is wearing a neat police uniform and sporting a badge. She is smiling with a friendly and confident demeanor. The background is blurred, featuring a cartoon logo.",
+                "output_h": 1024,
+                "output_w": 1024,
+                "seed": 10000000,
+                "repainting_scale": 1.0,
+                "edit_type": "repainting"
+            },
+            {
+                "input_image": None,
+                "input_mask": None,
+                "input_reference_image": "./assets/samples/subject/subject_1.jpg",
+                "save_path": "examples/outputs/subject_subject_1.jpg",
+                "instruction": "Display the logo in a minimalist style printed in white on a matte black ceramic coffee mug, alongside a steaming cup of coffee on a cozy cafe table.",
+                "output_h": 1024,
+                "output_w": 1024,
+                "seed": 10000000,
+                "repainting_scale": 1.0,
+                "edit_type": "repainting"
+            },
+            {
+                "input_image": "./assets/samples/application/photo_editing/1_2_edit.jpg",
+                "input_mask": "./assets/samples/application/photo_editing/1_2_m.webp",
+                "input_reference_image": "./assets/samples/application/photo_editing/1_ref.png",
+                "save_path": "examples/outputs/photo_editing_1.jpg",
+                "instruction": "The item is put on the table.",
+                "output_h": 1024,
+                "output_w": 1024,
+                "seed": 8006019,
+                "repainting_scale": 1.0,
+                "edit_type": "repainting"
+            },
+            {
+                "input_image": "./assets/samples/application/logo_paste/1_1_edit.png",
+                "input_mask": "./assets/samples/application/logo_paste/1_1_m.png",
+                "input_reference_image": "assets/samples/application/logo_paste/1_ref.png",
+                "save_path": "examples/outputs/logo_paste_1.jpg",
+                "instruction": "The logo is printed on the headphones.",
+                "output_h": 1024,
+                "output_w": 1024,
+                "seed": 934582264,
+                "repainting_scale": 1.0,
+                "edit_type": "repainting"
+            },
+            {
+                "input_image": "./assets/samples/application/try_on/1_1_edit.png",
+                "input_mask": "./assets/samples/application/try_on/1_1_m.png",
+                "input_reference_image": "assets/samples/application/try_on/1_ref.png",
+                "save_path": "examples/outputs/try_on_1.jpg",
+                "instruction": "The woman dresses this skirt.",
+                "output_h": 1024,
+                "output_w": 1024,
+                "seed": 934582264,
+                "repainting_scale": 1.0,
+                "edit_type": "repainting"
+            },
+            {
+                "input_image": "./assets/samples/portrait/human_1.jpg",
+                "input_mask": "assets/samples/application/movie_poster/1_2_m.webp",
+                "input_reference_image": "assets/samples/application/movie_poster/1_ref.png",
+                "save_path": "examples/outputs/movie_poster_1.jpg",
+                "instruction": "{image}, the man faces the camera.",
+                "output_h": 1024,
+                "output_w": 1024,
+                "seed": 3999647,
+                "repainting_scale": 1.0,
+                "edit_type": "repainting"
+            },
+            {
+                "input_image": "./assets/samples/application/sr/sr_tiger.png",
+                "input_mask": "./assets/samples/application/sr/sr_tiger_m.webp",
+                "input_reference_image": None,
+                "save_path": "examples/outputs/mario_recolorizing_1.jpg",
+                "instruction": "{image} features a close-up of a young, furry tiger cub on a rock. The tiger, which appears to be quite young, has distinctive orange, "
+                               "black, and white striped fur, typical of tigers. The cub's eyes have a bright and curious expression, and its ears are perked up, "
+                               "indicating alertness. The cub seems to be in the act of climbing or resting on the rock. The background is a blurred grassland with trees, "
+                               "but the focus is on the cub, which is vividly colored while the rest of the image is in grayscale, drawing attention to the tiger's details."
+                               " The photo captures a moment in the wild, depicting the charming and tenacious nature of this young tiger,"
+                               " as well as its typical interaction with the environment.",
+                "output_h": 1024,
+                "output_w": 1024,
+                "seed": 199999,
+                "repainting_scale": 0.0,
+                "edit_type": "no_preprocess"
+            },
+            {
+                "input_image": "./assets/samples/application/photo_editing/1_ref.png",
+                "input_mask": "./assets/samples/application/photo_editing/1_1_orm.webp",
+                "input_reference_image": None,
+                "save_path": "examples/outputs/mario_repainting_1.jpg",
+                "instruction": "a blue hand",
+                "output_h": 1024,
+                "output_w": 1024,
+                "seed": 63401,
+                "repainting_scale": 1.0,
+                "edit_type": "repainting"
+            },
+            {
+                "input_image": "./assets/samples/application/photo_editing/1_ref.png",
+                "input_mask": "./assets/samples/application/photo_editing/1_1_rm.webp",
+                "input_reference_image": None,
+                "save_path": "examples/outputs/mario_repainting_2.jpg",
+                "instruction": "Mechanical  hands like a robot",
+                "output_h": 1024,
+                "output_w": 1024,
+                "seed": 59107,
+                "repainting_scale": 1.0,
+                "edit_type": "repainting"
+            },
+            {
+                "input_image": "./assets/samples/control/1_1.webp",
+                "input_mask": "./assets/samples/control/1_1_m.webp",
+                "input_reference_image": None,
+                "save_path": "examples/outputs/control_recolorizing.jpg",
+                "instruction": "{image} Beautiful female portrait, Robot with smooth White transparent carbon shell, rococo detailing, Natural lighting, Highly detailed, Cinematic, 4K.",
+                "output_h": 1024,
+                "output_w": 1024,
+                "seed": 9652101,
+                "repainting_scale": 0.0,
+                "edit_type": "recolorizing"
+            },
+            {
+                "input_image": "./assets/samples/control/1_1.webp",
+                "input_mask": "./assets/samples/control/1_1_m.webp",
+                "input_reference_image": None,
+                "save_path": "examples/outputs/control_depth.jpg",
+                "instruction": "{image} Beautiful female portrait, Robot with smooth White transparent carbon shell, rococo detailing, Natural lighting, Highly detailed, Cinematic, 4K.",
+                "output_h": 1024,
+                "output_w": 1024,
+                "seed": 14979476,
+                "repainting_scale": 0.0,
+                "edit_type": "depth_repainting"
+            },
+            {
+                "input_image": "./assets/samples/control/1_1.webp",
+                "input_mask": "./assets/samples/control/1_1_m.webp",
+                "input_reference_image": None,
+                "save_path": "examples/outputs/control_contour.jpg",
+                "instruction": "{image} Beautiful female portrait, Robot with smooth White transparent carbon shell, rococo detailing, Natural lighting, Highly detailed, Cinematic, 4K.",
+                "output_h": 1024,
+                "output_w": 1024,
+                "seed": 4227292472,
+                "repainting_scale": 0.0,
+                "edit_type": "contour_repainting"
+            }
         ]

infer_fft.py ADDED Viewed

	@@ -0,0 +1,178 @@

+# -*- coding: utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import argparse
+import glob
+import importlib
+import io
+import os
+import sys
+from PIL import Image
+from scepter.modules.transform.io import pillow_convert
+from scepter.modules.utils.config import Config
+from scepter.modules.utils.file_system import FS
+if os.path.exists('__init__.py'):
+    package_name = 'scepter_ext'
+    spec = importlib.util.spec_from_file_location(package_name, '__init__.py')
+    package = importlib.util.module_from_spec(spec)
+    sys.modules[package_name] = package
+    spec.loader.exec_module(package)
+from examples.examples import fft_examples as all_examples
+from inference.registry import INFERENCES
+fs_list = [
+    Config(cfg_dict={"NAME": "HuggingfaceFs", "TEMP_DIR": "./cache"}, load=False),
+    Config(cfg_dict={"NAME": "ModelscopeFs", "TEMP_DIR": "./cache"}, load=False),
+    Config(cfg_dict={"NAME": "HttpFs", "TEMP_DIR": "./cache"}, load=False),
+    Config(cfg_dict={"NAME": "LocalFs", "TEMP_DIR": "./cache"}, load=False),
+]
+for one_fs in fs_list:
+    FS.init_fs_client(one_fs)
+def run_one_case(pipe,
+                input_image = None,
+                input_mask = None,
+                input_reference_image = None,
+                save_path = "examples/output/example.png",
+                instruction = "",
+                output_h = 1024,
+                output_w = 1024,
+                seed = -1,
+                sample_steps = None,
+                guide_scale = None,
+                repainting_scale = None,
+                use_change=True,
+                keep_pixels=True,
+                keep_pixels_rate=0.8,
+                **kwargs):
+    if input_image is not None:
+        input_image = Image.open(io.BytesIO(FS.get_object(input_image)))
+        input_image = pillow_convert(input_image, "RGB")
+    if input_mask is not None:
+        input_mask = Image.open(io.BytesIO(FS.get_object(input_mask)))
+        input_mask = pillow_convert(input_mask, "L")
+    if input_reference_image is not None:
+        input_reference_image = Image.open(io.BytesIO(FS.get_object(input_reference_image)))
+        input_reference_image = pillow_convert(input_reference_image, "RGB")
+    print(repainting_scale)
+    image, _, _, _, seed = pipe(
+        reference_image=input_reference_image,
+        edit_image=input_image,
+        edit_mask=input_mask,
+        prompt=instruction,
+        output_height=output_h,
+        output_width=output_w,
+        sampler='flow_euler',
+        sample_steps=sample_steps or pipe.input.get("sample_steps", 28),
+        guide_scale=guide_scale or pipe.input.get("guide_scale", 50),
+        seed=seed,
+        repainting_scale=repainting_scale,
+        use_change=use_change,
+        keep_pixels=keep_pixels,
+        keep_pixels_rate=keep_pixels_rate
+    )
+    with FS.put_to(save_path) as local_path:
+        image.save(local_path)
+    return local_path, seed
+def run():
+    parser = argparse.ArgumentParser(description='Argparser for Scepter:\n')
+    parser.add_argument('--instruction',
+                        dest='instruction',
+                        help='The instruction for editing or generating!',
+                        default="")
+    parser.add_argument('--output_h',
+                        dest='output_h',
+                        help='The height of output image for generation tasks!',
+                        type=int,
+                        default=1024)
+    parser.add_argument('--output_w',
+                        dest='output_w',
+                        help='The width of output image for generation tasks!',
+                        type=int,
+                        default=1024)
+    parser.add_argument('--input_reference_image',
+                        dest='input_reference_image',
+                        help='The input reference image!',
+                        default=None
+                        )
+    parser.add_argument('--input_image',
+                        dest='input_image',
+                        help='The input image!',
+                        default=None
+                        )
+    parser.add_argument('--input_mask',
+                        dest='input_mask',
+                        help='The input mask!',
+                        default=None
+                        )
+    parser.add_argument('--save_path',
+                        dest='save_path',
+                        help='The save path for output image!',
+                        default='examples/output_images/output.png'
+                        )
+    parser.add_argument('--seed',
+                        dest='seed',
+                        help='The seed for generation!',
+                        type=int,
+                        default=-1)
+    parser.add_argument('--step',
+                        dest='step',
+                        help='The sample step for generation!',
+                        type=int,
+                        default=None)
+    parser.add_argument('--guide_scale',
+                        dest='guide_scale',
+                        help='The guide scale for generation!',
+                        type=int,
+                        default=None)
+    parser.add_argument('--repainting_scale',
+                        dest='repainting_scale',
+                        help='The repainting scale for content filling generation!',
+                        type=int,
+                        default=None)
+    cfg = Config(load=True, parser_ins=parser)
+    model_cfg = Config(load=True, cfg_file="config/ace_plus_fft.yaml")
+    pipe = INFERENCES.build(model_cfg)
+    if cfg.args.instruction == "" and cfg.args.input_image is None and cfg.args.input_reference_image is None:
+        params = {
+            "output_h": cfg.args.output_h,
+            "output_w": cfg.args.output_w,
+            "sample_steps": cfg.args.step,
+            "guide_scale": cfg.args.guide_scale
+        }
+        # run examples
+        for example in all_examples:
+            example.update(params)
+            local_path, seed = run_one_case(pipe, **example)
+    else:
+        params = {
+            "input_image": cfg.args.input_image,
+            "input_mask": cfg.args.input_mask,
+            "input_reference_image": cfg.args.input_reference_image,
+            "save_path": cfg.args.save_path,
+            "instruction": cfg.args.instruction,
+            "output_h": cfg.args.output_h,
+            "output_w": cfg.args.output_w,
+            "sample_steps": cfg.args.step,
+            "guide_scale": cfg.args.guide_scale,
+            "repainting_scale": cfg.args.repainting_scale,
+        }
+        local_path, seed = run_one_case(pipe, **params)
+        print(local_path, seed)
+if __name__ == '__main__':
+    run()

infer_lora.py ADDED Viewed

	@@ -0,0 +1,228 @@

+# -*- coding: utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import argparse
+import glob
+import io
+import os
+from PIL import Image
+from scepter.modules.transform.io import pillow_convert
+from scepter.modules.utils.config import Config
+from scepter.modules.utils.file_system import FS
+from examples.examples import all_examples
+from inference.ace_plus_diffusers import ACEPlusDiffuserInference
+inference_dict = {
+    "ACE_DIFFUSER_PLUS": ACEPlusDiffuserInference
+}
+fs_list = [
+    Config(cfg_dict={"NAME": "HuggingfaceFs", "TEMP_DIR": "./cache"}, load=False),
+    Config(cfg_dict={"NAME": "ModelscopeFs", "TEMP_DIR": "./cache"}, load=False),
+    Config(cfg_dict={"NAME": "HttpFs", "TEMP_DIR": "./cache"}, load=False),
+    Config(cfg_dict={"NAME": "LocalFs", "TEMP_DIR": "./cache"}, load=False),
+]
+for one_fs in fs_list:
+    FS.init_fs_client(one_fs)
+def run_one_case(pipe,
+                input_image = None,
+                input_mask = None,
+                input_reference_image = None,
+                save_path = "examples/output/example.png",
+                instruction = "",
+                output_h = 1024,
+                output_w = 1024,
+                seed = -1,
+                sample_steps = None,
+                guide_scale = None,
+                repainting_scale = None,
+                model_path = None,
+                **kwargs):
+    if input_image is not None:
+        input_image = Image.open(io.BytesIO(FS.get_object(input_image)))
+        input_image = pillow_convert(input_image, "RGB")
+    if input_mask is not None:
+        input_mask = Image.open(io.BytesIO(FS.get_object(input_mask)))
+        input_mask = pillow_convert(input_mask, "L")
+    if input_reference_image is not None:
+        input_reference_image = Image.open(io.BytesIO(FS.get_object(input_reference_image)))
+        input_reference_image = pillow_convert(input_reference_image, "RGB")
+    image, seed = pipe(
+        reference_image=input_reference_image,
+        edit_image=input_image,
+        edit_mask=input_mask,
+        prompt=instruction,
+        output_height=output_h,
+        output_width=output_w,
+        sampler='flow_euler',
+        sample_steps=sample_steps or pipe.input.get("sample_steps", 28),
+        guide_scale=guide_scale or pipe.input.get("guide_scale", 50),
+        seed=seed,
+        repainting_scale=repainting_scale or pipe.input.get("repainting_scale", 1.0),
+        lora_path = model_path
+    )
+    with FS.put_to(save_path) as local_path:
+        image.save(local_path)
+    return local_path, seed
+def run():
+    parser = argparse.ArgumentParser(description='Argparser for Scepter:\n')
+    parser.add_argument('--instruction',
+                        dest='instruction',
+                        help='The instruction for editing or generating!',
+                        default="")
+    parser.add_argument('--output_h',
+                        dest='output_h',
+                        help='The height of output image for generation tasks!',
+                        type=int,
+                        default=1024)
+    parser.add_argument('--output_w',
+                        dest='output_w',
+                        help='The width of output image for generation tasks!',
+                        type=int,
+                        default=1024)
+    parser.add_argument('--input_reference_image',
+                        dest='input_reference_image',
+                        help='The input reference image!',
+                        default=None
+                        )
+    parser.add_argument('--input_image',
+                        dest='input_image',
+                        help='The input image!',
+                        default=None
+                        )
+    parser.add_argument('--input_mask',
+                        dest='input_mask',
+                        help='The input mask!',
+                        default=None
+                        )
+    parser.add_argument('--save_path',
+                        dest='save_path',
+                        help='The save path for output image!',
+                        default='examples/output_images/output.png'
+                        )
+    parser.add_argument('--seed',
+                        dest='seed',
+                        help='The seed for generation!',
+                        type=int,
+                        default=-1)
+    parser.add_argument('--step',
+                        dest='step',
+                        help='The sample step for generation!',
+                        type=int,
+                        default=None)
+    parser.add_argument('--guide_scale',
+                        dest='guide_scale',
+                        help='The guide scale for generation!',
+                        type=int,
+                        default=None)
+    parser.add_argument('--repainting_scale',
+                        dest='repainting_scale',
+                        help='The repainting scale for content filling generation!',
+                        type=int,
+                        default=None)
+    parser.add_argument('--task_type',
+                        dest='task_type',
+                        choices=['portrait', 'subject', 'local_editing'],
+                        help="Choose the task type.",
+                        default='')
+    parser.add_argument('--task_model',
+                        dest='task_model',
+                        help='The models list for different tasks!',
+                        default="./models/model_zoo.yaml")
+    parser.add_argument('--infer_type',
+                        dest='infer_type',
+                        choices=['diffusers'],
+                        default='diffusers',
+                        help="Choose the inference scripts. 'native' refers to using the official implementation of ace++, "
+                             "while 'diffusers' refers to using the adaptation for diffusers")
+    parser.add_argument('--cfg_folder',
+                        dest='cfg_folder',
+                        help='The inference config!',
+                        default="./config")
+    cfg = Config(load=True, parser_ins=parser)
+    model_yamls = glob.glob(os.path.join(cfg.args.cfg_folder, '*.yaml'))
+    model_choices = dict()
+    for i in model_yamls:
+        model_cfg = Config(load=True, cfg_file=i)
+        model_name = model_cfg.NAME
+        model_choices[model_name] = model_cfg
+    if cfg.args.infer_type == "native":
+        infer_name = "ace_plus_native_infer"
+    elif cfg.args.infer_type == "diffusers":
+        infer_name = "ace_plus_diffuser_infer"
+    else:
+        raise ValueError("infer_type should be native or diffusers")
+    assert infer_name in model_choices
+    # choose different model
+    task_model_cfg = Config(load=True, cfg_file=cfg.args.task_model)
+    task_model_dict = {}
+    for task_name, task_model in task_model_cfg.MODEL.items():
+        task_model_dict[task_name] = task_model
+    # choose the inference scripts.
+    pipe_cfg = model_choices[infer_name]
+    infer_name = pipe_cfg.get("INFERENCE_TYPE", "ACE_PLUS")
+    pipe = inference_dict[infer_name]()
+    pipe.init_from_cfg(pipe_cfg)
+    if cfg.args.instruction == "" and cfg.args.input_image is None and cfg.args.input_reference_image is None:
+        params = {
+            "output_h": cfg.args.output_h,
+            "output_w": cfg.args.output_w,
+            "sample_steps": cfg.args.step,
+            "guide_scale": cfg.args.guide_scale
+        }
+        # run examples
+        for example in all_examples:
+            example["model_path"] = FS.get_from(task_model_dict[example["task_type"].upper()]["MODEL_PATH"])
+            example.update(params)
+            if example["edit_type"] == "repainting":
+                example["repainting_scale"] = 1.0
+            else:
+                example["repainting_scale"] = task_model_dict[example["task_type"].upper()].get("REPAINTING_SCALE", 1.0)
+            print(example)
+            local_path, seed = run_one_case(pipe, **example)
+    else:
+        assert cfg.args.task_type.upper() in task_model_cfg
+        params = {
+            "input_image": cfg.args.input_image,
+            "input_mask": cfg.args.input_mask,
+            "input_reference_image": cfg.args.input_reference_image,
+            "save_path": cfg.args.save_path,
+            "instruction": cfg.args.instruction,
+            "output_h": cfg.args.output_h,
+            "output_w": cfg.args.output_w,
+            "sample_steps": cfg.args.step,
+            "guide_scale": cfg.args.guide_scale,
+            "repainting_scale": cfg.args.repainting_scale,
+            "model_path": FS.get_from(task_model_dict[cfg.args.task_type.upper()]["MODEL_PATH"])
+        }
+        local_path, seed = run_one_case(pipe, **params)
+        print(local_path, seed)
+if __name__ == '__main__':
+    run()

inference/__init__.py CHANGED Viewed

	@@ -0,0 +1,2 @@


1	+ from .ace_plus_diffusers import ACEPlusDiffuserInference
2	+ from .ace_plus_inference import ACEInference

inference/ace_plus_diffusers.py CHANGED Viewed

@@ -12,7 +12,6 @@ from scepter.modules.utils.logger import get_logger
 from transformers import T5TokenizerFast
 from .utils import ACEPlusImageProcessor
 class ACEPlusDiffuserInference():
     def __init__(self, logger=None):
         if logger is None:
@@ -39,7 +38,6 @@ class ACEPlusDiffuserInference():
         self.pipe.tokenizer_2 = tokenizer_2
         self.load_default(cfg.DEFAULT_PARAS)
     def prepare_input(self,
                       image,
                       mask,
@@ -88,7 +86,11 @@ class ACEPlusDiffuserInference():
         if isinstance(prompt, str):
             prompt = [prompt]
         seed = seed if seed >= 0 else random.randint(0, 2 ** 32 - 1)
-        image, mask, out_h, out_w, slice_w = self.image_processor.preprocess(reference_image, edit_image, edit_mask, repainting_scale = repainting_scale)
         h, w = image.shape[1:]
         generator = torch.Generator("cpu").manual_seed(seed)
         masked_image_latents = self.prepare_input(image, mask,
@@ -98,6 +100,8 @@ class ACEPlusDiffuserInference():
             with FS.get_from(lora_path) as local_path:
                 self.pipe.load_lora_weights(local_path)
         image = self.pipe(
             prompt=prompt,
             masked_image_latents=masked_image_latents,

 from transformers import T5TokenizerFast
 from .utils import ACEPlusImageProcessor
 class ACEPlusDiffuserInference():
     def __init__(self, logger=None):
         if logger is None:
         self.pipe.tokenizer_2 = tokenizer_2
         self.load_default(cfg.DEFAULT_PARAS)
     def prepare_input(self,
                       image,
                       mask,
         if isinstance(prompt, str):
             prompt = [prompt]
         seed = seed if seed >= 0 else random.randint(0, 2 ** 32 - 1)
+        # edit_image, edit_mask, change_image, content_image, out_h, out_w, slice_w
+        image, mask, _, _, out_h, out_w, slice_w = self.image_processor.preprocess(reference_image, edit_image, edit_mask,
+                                                                             width = output_width,
+                                                                             height = output_height,
+                                                                             repainting_scale = repainting_scale)
         h, w = image.shape[1:]
         generator = torch.Generator("cpu").manual_seed(seed)
         masked_image_latents = self.prepare_input(image, mask,
             with FS.get_from(lora_path) as local_path:
                 self.pipe.load_lora_weights(local_path)
         image = self.pipe(
             prompt=prompt,
             masked_image_latents=masked_image_latents,

inference/ace_plus_inference.py ADDED Viewed

	@@ -0,0 +1,83 @@

+# -*- coding: utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import random
+from collections import OrderedDict
+import torch, numpy as np
+from PIL import Image
+from scepter.modules.model.registry import MODELS
+from scepter.modules.utils.config import Config
+from scepter.modules.utils.distribute import we
+from .registry import BaseInference, INFERENCES
+from .utils import ACEPlusImageProcessor
+@INFERENCES.register_class()
+class ACEInference(BaseInference):
+    '''
+        reuse the ldm code
+    '''
+    def __init__(self, cfg, logger=None):
+        super().__init__(cfg, logger)
+        self.pipe = MODELS.build(cfg.MODEL, logger=self.logger).eval().to(we.device_id)
+        self.image_processor = ACEPlusImageProcessor(max_seq_len=cfg.MAX_SEQ_LEN)
+        self.input = {k.lower(): dict(v).get('DEFAULT', None) if isinstance(v, (dict, OrderedDict, Config)) else v for
+                      k, v in cfg.SAMPLE_ARGS.items()}
+        self.dtype = getattr(torch, cfg.get("DTYPE", "bfloat16"))
+    @torch.no_grad()
+    def __call__(self,
+                 reference_image=None,
+                 edit_image=None,
+                 edit_mask=None,
+                 prompt='',
+                 edit_type=None,
+                 output_height=1024,
+                 output_width=1024,
+                 sampler='flow_euler',
+                 sample_steps=28,
+                 guide_scale=50,
+                 lora_path=None,
+                 seed=-1,
+                 repainting_scale=0,
+                 use_change=False,
+                 keep_pixels=False,
+                 keep_pixels_rate=0.8,
+                 **kwargs):
+        # convert the input info to the input of ldm.
+        if isinstance(prompt, str):
+            prompt = [prompt]
+        seed = seed if seed >= 0 else random.randint(0, 2 ** 24 - 1)
+        image, mask, change_image, content_image, out_h, out_w, slice_w = self.image_processor.preprocess(reference_image, edit_image, edit_mask,
+                                                                             height=output_height, width=output_width,
+                                                                             repainting_scale=repainting_scale,
+                                                                             keep_pixels=keep_pixels,
+                                                                             keep_pixels_rate=keep_pixels_rate,
+                                                                             use_change = use_change)
+        change_image = [None] if change_image is None else [change_image.to(we.device_id)]
+        image, mask = [image.to(we.device_id)], [mask.to(we.device_id)]
+        (src_image_list, src_mask_list, modify_image_list,
+         edit_id, prompt) = [image], [mask], [change_image], [[0]], [prompt]
+        with torch.amp.autocast(enabled=True, dtype=self.dtype, device_type='cuda'):
+            out_image = self.pipe(
+                src_image_list=src_image_list,
+                modify_image_list= modify_image_list,
+                src_mask_list=src_mask_list,
+                edit_id=edit_id,
+                image=image,
+                image_mask=mask,
+                prompt=prompt,
+                sampler='flow_euler',
+                sample_steps=sample_steps,
+                seed=seed,
+                guide_scale=guide_scale,
+                show_process=True,
+            )
+        imgs = [x_i['reconstruct_image'].float().permute(1, 2, 0).cpu().numpy()
+            for x_i in out_image
+        ]
+        imgs = [Image.fromarray((img * 255).astype(np.uint8)) for img in imgs]
+        edit_image = Image.fromarray((torch.clamp(image[0] / 2 + 0.5, min=0.0, max=1.0)*255).float().permute(1, 2, 0).cpu().numpy().astype(np.uint8))
+        change_image = Image.fromarray((torch.clamp(change_image[0] / 2 + 0.5, min=0.0, max=1.0)*255).float().permute(1, 2, 0).cpu().numpy().astype(np.uint8))
+        mask = Image.fromarray((mask[0] * 255).squeeze(0).cpu().numpy().astype(np.uint8))
+        return self.image_processor.postprocess(imgs[0], slice_w, out_w, out_h), edit_image, change_image, mask, seed

inference/registry.py ADDED Viewed

	@@ -0,0 +1,228 @@

+# -*- coding: utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import torch
+from PIL.Image import Image
+from collections import OrderedDict
+from scepter.modules.utils.distribute import we
+from scepter.modules.utils.config import Config
+from scepter.modules.utils.logger import get_logger
+from scepter.studio.utils.env import get_available_memory
+from scepter.modules.model.registry import MODELS, BACKBONES, EMBEDDERS
+from scepter.modules.utils.registry import Registry, build_from_config
+def get_model(model_tuple):
+    assert 'model' in model_tuple
+    return model_tuple['model']
+class BaseInference():
+    '''
+        support to load the components dynamicly.
+        create and load model when run this model at the first time.
+    '''
+    def __init__(self, cfg, logger=None):
+        if logger is None:
+            logger = get_logger(name='scepter')
+        self.logger = logger
+        self.name = cfg.NAME
+    def init_from_modules(self, modules):
+        for k, v in modules.items():
+            self.__setattr__(k, v)
+    def infer_model(self, cfg, module_paras=None):
+        module = {
+            'model': None,
+            'cfg': cfg,
+            'device': 'offline',
+            'name': cfg.NAME,
+            'function_info': {},
+            'paras': {}
+        }
+        if module_paras is None:
+            return module
+        function_info = {}
+        paras = {
+            k.lower(): v
+            for k, v in module_paras.get('PARAS', {}).items()
+        }
+        for function in module_paras.get('FUNCTION', []):
+            input_dict = {}
+            for inp in function.get('INPUT', []):
+                if inp.lower() in self.input:
+                    input_dict[inp.lower()] = self.input[inp.lower()]
+            function_info[function.NAME] = {
+                'dtype': function.get('DTYPE', 'float32'),
+                'input': input_dict
+            }
+        module['paras'] = paras
+        module['function_info'] = function_info
+        return module
+    def init_from_ckpt(self, path, model, ignore_keys=list()):
+        if path.endswith('safetensors'):
+            from safetensors.torch import load_file as load_safetensors
+            sd = load_safetensors(path)
+        else:
+            sd = torch.load(path, map_location='cpu', weights_only=True)
+        new_sd = OrderedDict()
+        for k, v in sd.items():
+            ignored = False
+            for ik in ignore_keys:
+                if ik in k:
+                    if we.rank == 0:
+                        self.logger.info(
+                            'Ignore key {} from state_dict.'.format(k))
+                    ignored = True
+                    break
+            if not ignored:
+                new_sd[k] = v
+        missing, unexpected = model.load_state_dict(new_sd, strict=False)
+        if we.rank == 0:
+            self.logger.info(
+                f'Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys'
+            )
+            if len(missing) > 0:
+                self.logger.info(f'Missing Keys:\n {missing}')
+            if len(unexpected) > 0:
+                self.logger.info(f'\nUnexpected Keys:\n {unexpected}')
+    def load(self, module):
+        if module['device'] == 'offline':
+            from scepter.modules.utils.import_utils import LazyImportModule
+            if (LazyImportModule.get_module_type(('MODELS', module['cfg'].NAME)) or
+                    module['cfg'].NAME in MODELS.class_map):
+                model = MODELS.build(module['cfg'], logger=self.logger).eval()
+            elif (LazyImportModule.get_module_type(('BACKBONES', module['cfg'].NAME)) or
+                    module['cfg'].NAME in BACKBONES.class_map):
+                model = BACKBONES.build(module['cfg'],
+                                        logger=self.logger).eval()
+            elif (LazyImportModule.get_module_type(('EMBEDDERS', module['cfg'].NAME)) or
+                    module['cfg'].NAME in EMBEDDERS.class_map):
+                model = EMBEDDERS.build(module['cfg'],
+                                        logger=self.logger).eval()
+            else:
+                raise NotImplementedError
+            if 'DTYPE' in module['cfg'] and module['cfg']['DTYPE'] is not None:
+                model = model.to(getattr(torch, module['cfg'].DTYPE))
+            if module['cfg'].get('RELOAD_MODEL', None):
+                self.init_from_ckpt(module['cfg'].RELOAD_MODEL, model)
+            module['model'] = model
+            module['device'] = 'cpu'
+        if module['device'] == 'cpu':
+            module['device'] = we.device_id
+            module['model'] = module['model'].to(we.device_id)
+        return module
+    def unload(self, module):
+        if module is None:
+            return module
+        mem = get_available_memory()
+        free_mem = int(mem['available'] / (1024**2))
+        total_mem = int(mem['total'] / (1024**2))
+        if free_mem < 0.5 * total_mem:
+            if module['model'] is not None:
+                module['model'] = module['model'].to('cpu')
+                del module['model']
+            module['model'] = None
+            module['device'] = 'offline'
+            print('delete module')
+        else:
+            if module['model'] is not None:
+                module['model'] = module['model'].to('cpu')
+                module['device'] = 'cpu'
+            else:
+                module['device'] = 'offline'
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            torch.cuda.ipc_collect()
+        return module
+    def dynamic_load(self, module=None, name=''):
+        self.logger.info('Loading {} model'.format(name))
+        if name == 'all':
+            for subname in self.loaded_model_name:
+                self.loaded_model[subname] = self.dynamic_load(
+                    getattr(self, subname), subname)
+        elif name in self.loaded_model_name:
+            if name in self.loaded_model:
+                if module['cfg'] != self.loaded_model[name]['cfg']:
+                    self.unload(self.loaded_model[name])
+                    module = self.load(module)
+                    self.loaded_model[name] = module
+                    return module
+                elif module['device'] == 'cpu' or module['device'] == 'offline':
+                    module = self.load(module)
+                    return module
+                else:
+                    return module
+            else:
+                module = self.load(module)
+                self.loaded_model[name] = module
+                return module
+        else:
+            return self.load(module)
+    def dynamic_unload(self, module=None, name='', skip_loaded=False):
+        self.logger.info('Unloading {} model'.format(name))
+        if name == 'all':
+            for name, module in self.loaded_model.items():
+                module = self.unload(self.loaded_model[name])
+                self.loaded_model[name] = module
+        elif name in self.loaded_model_name:
+            if name in self.loaded_model:
+                if not skip_loaded:
+                    module = self.unload(self.loaded_model[name])
+                    self.loaded_model[name] = module
+            else:
+                self.unload(module)
+        else:
+            self.unload(module)
+    def load_default(self, cfg):
+        module_paras = {}
+        if cfg is not None:
+            self.paras = cfg.PARAS
+            self.input_cfg = {k.lower(): v for k, v in cfg.INPUT.items()}
+            self.input = {k.lower(): dict(v).get('DEFAULT', None) if isinstance(v, (dict, OrderedDict, Config)) else v for k, v in cfg.INPUT.items()}
+            self.output = {k.lower(): v for k, v in cfg.OUTPUT.items()}
+            module_paras = cfg.MODULES_PARAS
+        return module_paras
+    def load_image(self, image, num_samples=1):
+        if isinstance(image, torch.Tensor):
+            pass
+        elif isinstance(image, Image):
+            pass
+        elif isinstance(image, Image):
+            pass
+    def get_function_info(self, module, function_name=None):
+        all_function = module['function_info']
+        if function_name in all_function:
+            return function_name, all_function[function_name]['dtype']
+        if function_name is None and len(all_function) == 1:
+            for k, v in all_function.items():
+                return k, v['dtype']
+    @torch.no_grad()
+    def __call__(self,
+                 input,
+                 **kwargs):
+        return
+def build_inference(cfg, registry, logger=None, *args, **kwargs):
+    """ After build model, load pretrained model if exists key `pretrain`.
+    pretrain (str, dict): Describes how to load pretrained model.
+        str, treat pretrain as model path;
+        dict: should contains key `path`, and other parameters token by function load_pretrained();
+    """
+    if not isinstance(cfg, Config):
+        raise TypeError(f'Config must be type dict, got {type(cfg)}')
+    model = build_from_config(cfg, registry, logger=logger, *args, **kwargs)
+    return model
+# reigister cls for diffusion.
+INFERENCES = Registry('INFERENCE', build_func=build_inference)

inference/utils.py CHANGED Viewed

@@ -49,7 +49,10 @@ class ACEPlusImageProcessor():
                    edit_mask=None,
                    height=1024,
                    width=1024,
-                   repainting_scale = 1.0):
         reference_image = self.image_check(reference_image)
         edit_image = self.image_check(edit_image)
         # for reference generation
@@ -57,8 +60,12 @@ class ACEPlusImageProcessor():
             edit_image = torch.zeros([3, height, width])
             edit_mask = torch.ones([1, height, width])
         else:
-            edit_mask = np.asarray(edit_mask)
-            edit_mask = np.where(edit_mask > 128, 1, 0)
             edit_mask = edit_mask.astype(
                 np.float32) if np.any(edit_mask) else np.ones_like(edit_mask).astype(
                 np.float32)
@@ -71,12 +78,27 @@ class ACEPlusImageProcessor():
         assert edit_mask is not None
         if reference_image is not None:
-        # align height with edit_image
             _, H, W = reference_image.shape
             _, eH, eW = edit_image.shape
-            scale = eH / H
-            tH, tW = eH, int(W * scale)
-            reference_image = T.Resize((tH, tW), interpolation=T.InterpolationMode.BILINEAR, antialias=True)(reference_image)
             edit_image = torch.cat([reference_image, edit_image], dim=-1)
             edit_mask = torch.cat([torch.zeros([1, reference_image.shape[1], reference_image.shape[2]]), edit_mask], dim=-1)
             slice_w = reference_image.shape[-1]
@@ -89,16 +111,21 @@ class ACEPlusImageProcessor():
         rW = int(W * scale) // self.d * self.d
         slice_w = int(slice_w * scale) // self.d * self.d
-        edit_image = T.Resize((rH, rW), interpolation=T.InterpolationMode.BILINEAR, antialias=True)(edit_image)
         edit_mask = T.Resize((rH, rW), interpolation=T.InterpolationMode.NEAREST_EXACT, antialias=True)(edit_mask)
-        return edit_image, edit_mask, out_h, out_w, slice_w
     def postprocess(self, image, slice_w, out_w, out_h):
         w, h = image.size
         if slice_w > 0:
-            output_image = image.crop((slice_w + 20, 0, w, h))
             output_image = output_image.resize((out_w, out_h))
         else:
             output_image = image

                    edit_mask=None,
                    height=1024,
                    width=1024,
+                   repainting_scale = 1.0,
+                   keep_pixels = False,
+                   keep_pixels_rate = 0.8,
+                   use_change = False):
         reference_image = self.image_check(reference_image)
         edit_image = self.image_check(edit_image)
         # for reference generation
             edit_image = torch.zeros([3, height, width])
             edit_mask = torch.ones([1, height, width])
         else:
+            if edit_mask is None:
+                _, eH, eW = edit_image.shape
+                edit_mask = np.ones((eH, eW))
+            else:
+                edit_mask = np.asarray(edit_mask)
+                edit_mask = np.where(edit_mask > 128, 1, 0)
             edit_mask = edit_mask.astype(
                 np.float32) if np.any(edit_mask) else np.ones_like(edit_mask).astype(
                 np.float32)
         assert edit_mask is not None
         if reference_image is not None:
             _, H, W = reference_image.shape
             _, eH, eW = edit_image.shape
+            if not keep_pixels:
+                # align height with edit_image
+                scale = eH / H
+                tH, tW = eH, int(W * scale)
+                reference_image = T.Resize((tH, tW), interpolation=T.InterpolationMode.BILINEAR, antialias=True)(
+                    reference_image)
+            else:
+                # padding
+                if H >= keep_pixels_rate * eH:
+                    tH = int(eH * keep_pixels_rate)
+                    scale = tH/H
+                    tW = int(W * scale)
+                    reference_image = T.Resize((tH, tW), interpolation=T.InterpolationMode.BILINEAR, antialias=True)(
+                        reference_image)
+                rH, rW = reference_image.shape[-2:]
+                delta_w = 0
+                delta_h = eH - rH
+                padding = (delta_w // 2, delta_h // 2, delta_w - (delta_w // 2), delta_h - (delta_h // 2))
+                reference_image = T.Pad(padding, fill=0, padding_mode="constant")(reference_image)
             edit_image = torch.cat([reference_image, edit_image], dim=-1)
             edit_mask = torch.cat([torch.zeros([1, reference_image.shape[1], reference_image.shape[2]]), edit_mask], dim=-1)
             slice_w = reference_image.shape[-1]
         rW = int(W * scale) // self.d * self.d
         slice_w = int(slice_w * scale) // self.d * self.d
+        edit_image = T.Resize((rH, rW), interpolation=T.InterpolationMode.NEAREST_EXACT, antialias=True)(edit_image)
         edit_mask = T.Resize((rH, rW), interpolation=T.InterpolationMode.NEAREST_EXACT, antialias=True)(edit_mask)
+        content_image = edit_image
+        if use_change:
+            change_image = edit_image * edit_mask
+            edit_image = edit_image * (1 - edit_mask)
+        else:
+            change_image = None
+        return edit_image, edit_mask, change_image, content_image, out_h, out_w, slice_w
     def postprocess(self, image, slice_w, out_w, out_h):
         w, h = image.size
         if slice_w > 0:
+            output_image = image.crop((slice_w + 30, 0, w, h))
             output_image = output_image.resize((out_w, out_h))
         else:
             output_image = image