ACE-Plus

Running on Zero

App Files Files Community

chaojiemao commited on Nov 20, 2024

Commit

06b3202

verified ·

1 Parent(s): 202a7a7

Update ace_inference.py

Browse files

Files changed (1) hide show

ace_inference.py +15 -9

ace_inference.py CHANGED Viewed

@@ -92,6 +92,9 @@ class RefinerInference(DiffusionInference):
             if cfg.MODEL.have('DIFFUSION') else None
         self.max_seq_length = cfg.MODEL.get("MAX_SEQ_LENGTH", 4096)
         assert self.diffusion is not None
     @torch.no_grad()
     def encode_first_stage(self, x, **kwargs):
@@ -153,7 +156,7 @@ class RefinerInference(DiffusionInference):
             noise.append(noise_)
         noise, x_shapes = pack_imagelist_into_tensor(noise)
         if reverse_scale > 0:
-            self.dynamic_load(self.first_stage_model, 'first_stage_model')
             x_samples = [x.unsqueeze(0) for x in x_samples]
             x_start = self.encode_first_stage(x_samples, **kwargs)
             if use_dynamic_model: self.dynamic_unload(self.first_stage_model,
@@ -163,7 +166,7 @@ class RefinerInference(DiffusionInference):
         else:
             x_start = None
         # cond stage
-        self.dynamic_load(self.cond_stage_model, 'cond_stage_model')
         function_name, dtype = self.get_function_info(self.cond_stage_model)
         with torch.autocast('cuda',
                             enabled=dtype == 'float16',
@@ -176,7 +179,7 @@ class RefinerInference(DiffusionInference):
                             skip_loaded=True)
-        self.dynamic_load(self.diffusion_model, 'diffusion_model')
         # UNet use input n_prompt
         function_name, dtype = self.get_function_info(
             self.diffusion_model)
@@ -207,7 +210,7 @@ class RefinerInference(DiffusionInference):
         if use_dynamic_model: self.dynamic_unload(self.diffusion_model,
                             'diffusion_model',
                             skip_loaded=True)
-        self.dynamic_load(self.first_stage_model, 'first_stage_model')
         x_samples = self.decode_first_stage(latent)
         if use_dynamic_model: self.dynamic_unload(self.first_stage_model,
                             'first_stage_model',
@@ -279,6 +282,9 @@ class ACEInference(DiffusionInference):
         self.size_factor = cfg.get('SIZE_FACTOR', 8)
         self.decoder_bias = cfg.get('DECODER_BIAS', 0)
         self.default_n_prompt = cfg.get('DEFAULT_N_PROMPT', '')
     @torch.no_grad()
     def encode_first_stage(self, x, **kwargs):
@@ -390,7 +396,7 @@ class ACEInference(DiffusionInference):
         if use_ace and (not is_txt_image or refiner_scale <= 0):
             ctx, null_ctx = {}, {}
             # Get Noise Shape
-            self.dynamic_load(self.first_stage_model, 'first_stage_model')
             x = self.encode_first_stage(image)
             if self.use_dynamic_model: self.dynamic_unload(self.first_stage_model,
                                 'first_stage_model',
@@ -408,7 +414,7 @@ class ACEInference(DiffusionInference):
             ctx['x_mask'] = null_ctx['x_mask'] = cond_mask
             # Encode Prompt
-            self.dynamic_load(self.cond_stage_model, 'cond_stage_model')
             function_name, dtype = self.get_function_info(self.cond_stage_model)
             cont, cont_mask = getattr(get_model(self.cond_stage_model),
                                       function_name)(prompt)
@@ -425,7 +431,7 @@ class ACEInference(DiffusionInference):
             null_ctx['crossattn'] = null_cont
             # Encode Edit Images
-            self.dynamic_load(self.first_stage_model, 'first_stage_model')
             edit_image = [to_device(i, strict=False) for i in edit_image]
             edit_image_mask = [to_device(i, strict=False) for i in edit_image_mask]
             e_img, e_mask = [], []
@@ -443,7 +449,7 @@ class ACEInference(DiffusionInference):
             null_ctx['edit_mask'] = ctx['edit_mask'] = e_mask
             # Diffusion Process
-            self.dynamic_load(self.diffusion_model, 'diffusion_model')
             function_name, dtype = self.get_function_info(self.diffusion_model)
             with torch.autocast('cuda',
                                 enabled=dtype in ('float16', 'bfloat16'),
@@ -489,7 +495,7 @@ class ACEInference(DiffusionInference):
                                 skip_loaded=False)
             # Decode to Pixel Space
-            self.dynamic_load(self.first_stage_model, 'first_stage_model')
             samples = unpack_tensor_into_imagelist(latent, x_shapes)
             x_samples = self.decode_first_stage(samples)
             if self.use_dynamic_model: self.dynamic_unload(self.first_stage_model,

             if cfg.MODEL.have('DIFFUSION') else None
         self.max_seq_length = cfg.MODEL.get("MAX_SEQ_LENGTH", 4096)
         assert self.diffusion is not None
+        self.dynamic_load(self.cond_stage_model, 'cond_stage_model')
+        self.dynamic_load(self.diffusion_model, 'diffusion_model')
+        self.dynamic_load(self.first_stage_model, 'first_stage_model')
     @torch.no_grad()
     def encode_first_stage(self, x, **kwargs):
             noise.append(noise_)
         noise, x_shapes = pack_imagelist_into_tensor(noise)
         if reverse_scale > 0:
+            if use_dynamic_model: self.dynamic_load(self.first_stage_model, 'first_stage_model')
             x_samples = [x.unsqueeze(0) for x in x_samples]
             x_start = self.encode_first_stage(x_samples, **kwargs)
             if use_dynamic_model: self.dynamic_unload(self.first_stage_model,
         else:
             x_start = None
         # cond stage
+        if use_dynamic_model: self.dynamic_load(self.cond_stage_model, 'cond_stage_model')
         function_name, dtype = self.get_function_info(self.cond_stage_model)
         with torch.autocast('cuda',
                             enabled=dtype == 'float16',
                             skip_loaded=True)
+        if use_dynamic_model: self.dynamic_load(self.diffusion_model, 'diffusion_model')
         # UNet use input n_prompt
         function_name, dtype = self.get_function_info(
             self.diffusion_model)
         if use_dynamic_model: self.dynamic_unload(self.diffusion_model,
                             'diffusion_model',
                             skip_loaded=True)
+        if use_dynamic_model: self.dynamic_load(self.first_stage_model, 'first_stage_model')
         x_samples = self.decode_first_stage(latent)
         if use_dynamic_model: self.dynamic_unload(self.first_stage_model,
                             'first_stage_model',
         self.size_factor = cfg.get('SIZE_FACTOR', 8)
         self.decoder_bias = cfg.get('DECODER_BIAS', 0)
         self.default_n_prompt = cfg.get('DEFAULT_N_PROMPT', '')
+        self.dynamic_load(self.cond_stage_model, 'cond_stage_model')
+        self.dynamic_load(self.diffusion_model, 'diffusion_model')
+        self.dynamic_load(self.first_stage_model, 'first_stage_model')
     @torch.no_grad()
     def encode_first_stage(self, x, **kwargs):
         if use_ace and (not is_txt_image or refiner_scale <= 0):
             ctx, null_ctx = {}, {}
             # Get Noise Shape
+            if use_dynamic_model: self.dynamic_load(self.first_stage_model, 'first_stage_model')
             x = self.encode_first_stage(image)
             if self.use_dynamic_model: self.dynamic_unload(self.first_stage_model,
                                 'first_stage_model',
             ctx['x_mask'] = null_ctx['x_mask'] = cond_mask
             # Encode Prompt
+            if use_dynamic_model: self.dynamic_load(self.cond_stage_model, 'cond_stage_model')
             function_name, dtype = self.get_function_info(self.cond_stage_model)
             cont, cont_mask = getattr(get_model(self.cond_stage_model),
                                       function_name)(prompt)
             null_ctx['crossattn'] = null_cont
             # Encode Edit Images
+            if use_dynamic_model: self.dynamic_load(self.first_stage_model, 'first_stage_model')
             edit_image = [to_device(i, strict=False) for i in edit_image]
             edit_image_mask = [to_device(i, strict=False) for i in edit_image_mask]
             e_img, e_mask = [], []
             null_ctx['edit_mask'] = ctx['edit_mask'] = e_mask
             # Diffusion Process
+            if use_dynamic_model: self.dynamic_load(self.diffusion_model, 'diffusion_model')
             function_name, dtype = self.get_function_info(self.diffusion_model)
             with torch.autocast('cuda',
                                 enabled=dtype in ('float16', 'bfloat16'),
                                 skip_loaded=False)
             # Decode to Pixel Space
+            if use_dynamic_model: self.dynamic_load(self.first_stage_model, 'first_stage_model')
             samples = unpack_tensor_into_imagelist(latent, x_shapes)
             x_samples = self.decode_first_stage(samples)
             if self.use_dynamic_model: self.dynamic_unload(self.first_stage_model,