shunk031 commited on
Commit
3f3c4cd
1 Parent(s): 0c6516e

Upload processor

Browse files
image_processing_isnet.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple
2
+
3
+ import torch
4
+ import torch.nn.functional as F
5
+ from PIL import Image
6
+ from PIL.Image import Image as PilImage
7
+ from torchvision import transforms
8
+ from torchvision.transforms.functional import normalize
9
+ from transformers.image_processing_base import BatchFeature
10
+ from transformers.image_processing_utils import BaseImageProcessor
11
+ from transformers.image_utils import ImageInput
12
+
13
+
14
+ def apply_transform(data):
15
+ transform = transforms.ToTensor()
16
+ return transform(data)
17
+
18
+
19
+ class ISNetImageProcessor(BaseImageProcessor):
20
+ def __init__(self, model_in_size: Tuple[int, int] = (1024, 1024), **kwargs) -> None:
21
+ super().__init__(**kwargs)
22
+ self.model_in_size = model_in_size
23
+
24
+ def preprocess(self, images: ImageInput, **kwargs) -> BatchFeature:
25
+ if not isinstance(images, PilImage):
26
+ raise ValueError(f"Expected PIL Image, got {type(images)}")
27
+
28
+ image_pil = images
29
+ image_tensor = apply_transform(image_pil)
30
+
31
+ # shape: (3, h, w) -> (1, 3, h, w)
32
+ image_tensor = image_tensor.unsqueeze(dim=0)
33
+
34
+ image_tensor = F.interpolate(
35
+ image_tensor, size=self.model_in_size, mode="bilinear", align_corners=False
36
+ )
37
+ image_tensor = normalize(
38
+ image_tensor, mean=[0.5, 0.5, 0.5], std=[1.0, 1.0, 1.0]
39
+ )
40
+ return BatchFeature(data={"pixel_values": image_tensor}, tensor_type="pt")
41
+
42
+ def postprocess(
43
+ self, prediction: torch.Tensor, width: int, height: int, **kwargs
44
+ ) -> PilImage:
45
+ def _norm_prediction(d: torch.Tensor) -> torch.Tensor:
46
+ ma, mi = torch.max(d), torch.min(d)
47
+
48
+ # division while avoiding zero division
49
+ dn = (d - mi) / ((ma - mi) + torch.finfo(torch.float32).eps)
50
+ return dn
51
+
52
+ prediction = _norm_prediction(prediction)
53
+ prediction = prediction.squeeze()
54
+ prediction = prediction * 255 + 0.5
55
+ prediction = prediction.clamp(0, 255)
56
+
57
+ prediction_np = prediction.cpu().numpy()
58
+ image = Image.fromarray(prediction_np).convert("RGB")
59
+ image = image.resize((width, height), resample=Image.Resampling.BILINEAR)
60
+ return image
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoImageProcessor": "image_processing_isnet.ISNetImageProcessor"
4
+ },
5
+ "image_processor_type": "ISNetImageProcessor",
6
+ "model_in_size": [
7
+ 1024,
8
+ 1024
9
+ ]
10
+ }