Spaces:

hyz317
/

StdGEN

Running on L40S

App Files Files Community

YulianSa commited on Mar 14

Commit

8d53de2

1 Parent(s): 0aa1fe9

update

Browse files

Files changed (1) hide show

infer_api.py +27 -31

infer_api.py CHANGED Viewed

@@ -108,34 +108,32 @@ def set_seed(seed):
     torch.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
-class BkgRemover:
-    def __init__(self, force_cpu: Optional[bool] = True):
-        session_infer_path = hf_hub_download(
-            repo_id="skytnt/anime-seg", filename="isnetis.onnx",
-        )
-        providers: list[str] = ["CPUExecutionProvider"]
-        if not force_cpu and "CUDAExecutionProvider" in rt.get_available_providers():
-            providers = ["CUDAExecutionProvider"]
-        self.session_infer = rt.InferenceSession(
-            session_infer_path, providers=providers,
-        )
-    @spaces.GPU
-    def remove_background(
-        self,
-        img: np.ndarray,
-        alpha_min: float,
-        alpha_max: float,
-    ) -> list:
-        img = np.array(img)
-        mask = get_mask(self.session_infer, img)
-        mask[mask < alpha_min] = 0.0
-        mask[mask > alpha_max] = 1.0
-        img_after = (mask * img).astype(np.uint8)
-        mask = (mask * SCALE).astype(np.uint8)
-        img_after = np.concatenate([img_after, mask], axis=2, dtype=np.uint8)
-        return Image.fromarray(img_after)
 def process_image(image, totensor, width, height):
@@ -168,7 +166,7 @@ def process_image(image, totensor, width, height):
 @spaces.GPU
 @torch.no_grad()
-def inference(validation_pipeline, bkg_remover, input_image, vae, feature_extractor, image_encoder, unet, ref_unet, tokenizer,
               text_encoder, pretrained_model_path, validation, val_width, val_height, unet_condition_type,
               use_noise=True, noise_d=256, crop=False, seed=100, timestep=20):
     set_seed(seed)
@@ -186,7 +184,7 @@ def inference(validation_pipeline, bkg_remover, input_image, vae, feature_extrac
     B = 1
     if input_image.mode != "RGBA":
         # remove background
-        input_image = bkg_remover.remove_background(input_image, 0.1, 0.9)
     imgs_in = process_image(input_image, totensor, val_width, val_height)
     imgs_in = rearrange(imgs_in.unsqueeze(0).unsqueeze(0), "B Nv C H W -> (B Nv) C H W")
@@ -869,11 +867,9 @@ class InferCanonicalAPI:
         )
         self.validation_pipeline.set_progress_bar_config(disable=True)
-        self.bkg_remover = BkgRemover()
     def canonicalize(self, image, seed):
         return inference(
-            self.validation_pipeline, self.bkg_remover, image, self.vae, self.feature_extractor, self.image_encoder, self.unet, self.ref_unet, self.tokenizer, self.text_encoder,
             self.pretrained_model_path, self.validation, self.width_input, self.height_input, self.unet_condition_type,
             use_noise=self.use_noise, noise_d=self.noise_d, crop=True, seed=seed, timestep=self.timestep
         )

     torch.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
+session_infer_path = hf_hub_download(
+    repo_id="skytnt/anime-seg", filename="isnetis.onnx",
+)
+providers: list[str] = ["CPUExecutionProvider"]
+if "CUDAExecutionProvider" in rt.get_available_providers():
+    providers = ["CUDAExecutionProvider"]
+bkg_remover_session_infer = rt.InferenceSession(
+    session_infer_path, providers=providers,
+)
+@spaces.GPU
+def remove_background(
+    img: np.ndarray,
+    alpha_min: float,
+    alpha_max: float,
+) -> list:
+    img = np.array(img)
+    mask = get_mask(bkg_remover_session_infer, img)
+    mask[mask < alpha_min] = 0.0
+    mask[mask > alpha_max] = 1.0
+    img_after = (mask * img).astype(np.uint8)
+    mask = (mask * SCALE).astype(np.uint8)
+    img_after = np.concatenate([img_after, mask], axis=2, dtype=np.uint8)
+    return Image.fromarray(img_after)
 def process_image(image, totensor, width, height):
 @spaces.GPU
 @torch.no_grad()
+def inference(validation_pipeline, input_image, vae, feature_extractor, image_encoder, unet, ref_unet, tokenizer,
               text_encoder, pretrained_model_path, validation, val_width, val_height, unet_condition_type,
               use_noise=True, noise_d=256, crop=False, seed=100, timestep=20):
     set_seed(seed)
     B = 1
     if input_image.mode != "RGBA":
         # remove background
+        input_image = remove_background(input_image, 0.1, 0.9)
     imgs_in = process_image(input_image, totensor, val_width, val_height)
     imgs_in = rearrange(imgs_in.unsqueeze(0).unsqueeze(0), "B Nv C H W -> (B Nv) C H W")
         )
         self.validation_pipeline.set_progress_bar_config(disable=True)
     def canonicalize(self, image, seed):
         return inference(
+            self.validation_pipeline, image, self.vae, self.feature_extractor, self.image_encoder, self.unet, self.ref_unet, self.tokenizer, self.text_encoder,
             self.pretrained_model_path, self.validation, self.width_input, self.height_input, self.unet_condition_type,
             use_noise=self.use_noise, noise_d=self.noise_d, crop=True, seed=seed, timestep=self.timestep
         )