|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import copy |
|
|
|
import cv2 |
|
import numpy as np |
|
import torch |
|
|
|
|
|
class BaseModel(object): |
|
""" |
|
Simple BaseModel |
|
""" |
|
|
|
def cuda(self): |
|
self.model.cuda() |
|
return self |
|
|
|
def cpu(self): |
|
self.model.cpu() |
|
return self |
|
|
|
def float(self): |
|
self.model.float() |
|
return self |
|
|
|
def to(self, device): |
|
self.model.to(device) |
|
return self |
|
|
|
def eval(self): |
|
self.model.eval() |
|
|
|
return self |
|
|
|
def train(self): |
|
self.model.train() |
|
return self |
|
|
|
def __call__(self, x): |
|
raise NotImplementedError |
|
|
|
def __repr__(self): |
|
|
|
return f"model: \n{self.model}" |
|
|
|
|
|
def get_dtype_string(arr): |
|
if arr.dtype == np.uint8: |
|
return "uint8" |
|
elif arr.dtype == np.float32: |
|
return "float32" |
|
elif arr.dtype == np.float64: |
|
return "float" |
|
else: |
|
return "unknow" |
|
|
|
|
|
class BaseSeg(BaseModel): |
|
def __init__(self): |
|
pass |
|
|
|
|
|
class Bbox: |
|
def __init__(self, box, mode="whwh"): |
|
|
|
assert len(box) == 4 |
|
assert mode in ["whwh", "xywh"] |
|
self.box = box |
|
self.mode = mode |
|
|
|
def to_xywh(self): |
|
|
|
if self.mode == "whwh": |
|
|
|
l, t, r, b = self.box |
|
|
|
center_x = (l + r) / 2 |
|
center_y = (t + b) / 2 |
|
width = r - l |
|
height = b - t |
|
return Bbox([center_x, center_y, width, height], mode="xywh") |
|
else: |
|
return self |
|
|
|
def to_whwh(self): |
|
|
|
if self.mode == "whwh": |
|
return self |
|
else: |
|
|
|
cx, cy, w, h = self.box |
|
l = cx - w // 2 |
|
t = cy - h // 2 |
|
r = cx + w - (w // 2) |
|
b = cy + h - (h // 2) |
|
|
|
return Bbox([l, t, r, b], mode="whwh") |
|
|
|
def area(self): |
|
|
|
box = self.to_xywh() |
|
_, __, w, h = box.box |
|
|
|
return w * h |
|
|
|
def get_box(self): |
|
return list(map(int, self.box)) |
|
|
|
def scale(self, scale, width, height): |
|
new_box = self.to_xywh() |
|
cx, cy, w, h = new_box.get_box() |
|
w = w * scale |
|
h = h * scale |
|
|
|
l = cx - w // 2 |
|
t = cy - h // 2 |
|
r = cx + w - (w // 2) |
|
b = cy + h - (h // 2) |
|
|
|
l = int(max(l, 0)) |
|
t = int(max(t, 0)) |
|
r = int(min(r, width)) |
|
b = int(min(b, height)) |
|
|
|
return Bbox([l, t, r, b], mode="whwh") |
|
|
|
def __repr__(self): |
|
box = self.to_whwh() |
|
l, t, r, b = box.box |
|
|
|
return f"BBox(left={l}, top={t}, right={r}, bottom={b})" |
|
|
|
|
|
class Image: |
|
"""TODO need to debug""" |
|
|
|
TYPE_ORDER = ["uint8", "float32", "float"] |
|
ORDER = ["RGB", "BGR"] |
|
MODE = ["numpy"] |
|
|
|
def __init__(self, input, order="RGB", type_mode="uint8"): |
|
"""Only support 3 Channel Image""" |
|
if isinstance(input, str): |
|
self.data = self.read_image(input, type_mode, order) |
|
else: |
|
self.data = self.get_image(input, type_mode, order) |
|
|
|
self.order = order |
|
self.type_mode = type_mode |
|
|
|
def get_image(self, input, type_mode, order): |
|
if isinstance(input, Image): |
|
return input.to_numpy(type_mode, order) |
|
elif isinstance(input, np.ndarray): |
|
self.data = input |
|
self.order = "RGB" |
|
self.type_mode = get_dtype_string(input) |
|
|
|
return self.to_numpy(type_mode, order) |
|
else: |
|
raise NotImplementedError |
|
|
|
def to_numpy(self, type_mode="uint8", order="RGB"): |
|
|
|
data = copy.deepcopy(self.data) |
|
|
|
if not order == self.order: |
|
return data[..., ::-1] |
|
|
|
if self.type_mode == type_mode: |
|
return data |
|
else: |
|
if self.type_mode == "float32": |
|
return (self.data / 255.0).astype(np.float32) |
|
elif self.type_mode == "float": |
|
return (self.data / 255.0).astype(np.float64) |
|
|
|
def to_tensor(self, order): |
|
data = self.to_numpy(type_mode="float32", order=order) |
|
return torch.from_numpy(data) |
|
|
|
def read_image( |
|
self, |
|
path, |
|
mode, |
|
order, |
|
): |
|
"""read an image file into various formats and color mode. |
|
|
|
Args: |
|
path (str): path to the image file. |
|
mode (Literal["float", "uint8", "pil", "torch", "tensor"], optional): returned image format. Defaults to "float". |
|
float: float32 numpy array, range [0, 1]; |
|
uint8: uint8 numpy array, range [0, 255]; |
|
pil: PIL image; |
|
torch/tensor: float32 torch tensor, range [0, 1]; |
|
order (Literal["RGB", "RGBA", "BGR", "BGRA"], optional): channel order. Defaults to "RGB". |
|
|
|
Note: |
|
By default this function will convert RGBA image to white-background RGB image. Use ``order="RGBA"`` to keep the alpha channel. |
|
|
|
Returns: |
|
Union[np.ndarray, PIL.Image, torch.Tensor]: the image array. |
|
""" |
|
|
|
if mode == "pil": |
|
return Image.open(path).convert(order) |
|
|
|
img = cv2.imread(path, cv2.IMREAD_UNCHANGED) |
|
|
|
|
|
if len(img.shape) == 3: |
|
if order in ["RGB", "RGBA"]: |
|
if img.shape[-1] == 4: |
|
img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA) |
|
elif img.shape[-1] == 3: |
|
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
if img.shape[-1] == 4 and "A" not in order: |
|
img = img.astype(np.float32) / 255 |
|
img = img[..., :3] * img[..., 3:] + (1 - img[..., 3:]) |
|
|
|
|
|
if mode == "uint8": |
|
if img.dtype != np.uint8: |
|
img = (img * 255).astype(np.uint8) |
|
elif mode == "float": |
|
if img.dtype == np.uint8: |
|
img = img.astype(np.float32) / 255 |
|
else: |
|
raise ValueError(f"Unknown read_image mode {mode}") |
|
|
|
return img |
|
|