Last commit not found
import os | |
import math | |
import pickle | |
import random | |
import numpy as np | |
import glob | |
import torch | |
import cv2 | |
#################### | |
# Files & IO | |
#################### | |
###################### get image path list ###################### | |
IMG_EXTENSIONS = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP'] | |
def is_image_file(filename): | |
return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) | |
def _get_paths_from_images(path): | |
'''get image path list from image folder''' | |
assert os.path.isdir(path), '{:s} is not a valid directory'.format(path) | |
images = [] | |
for dirpath, _, fnames in sorted(os.walk(path)): | |
for fname in sorted(fnames): | |
if is_image_file(fname): | |
img_path = os.path.join(dirpath, fname) | |
images.append(img_path) | |
assert images, '{:s} has no valid image file'.format(path) | |
return images | |
def _get_paths_from_lmdb(dataroot): | |
'''get image path list from lmdb meta info''' | |
meta_info = pickle.load(open(os.path.join(dataroot, 'meta_info.pkl'), 'rb')) | |
paths = meta_info['keys'] | |
sizes = meta_info['resolution'] | |
if len(sizes) == 1: | |
sizes = sizes * len(paths) | |
return paths, sizes | |
def get_image_paths(data_type, dataroot): | |
'''get image path list | |
support lmdb or image files''' | |
paths, sizes = None, None | |
if dataroot is not None: | |
if data_type == 'lmdb': | |
paths, sizes = _get_paths_from_lmdb(dataroot) | |
elif data_type == 'img': | |
paths = sorted(_get_paths_from_images(dataroot)) | |
else: | |
raise NotImplementedError('data_type [{:s}] is not recognized.'.format(data_type)) | |
return paths, sizes | |
def glob_file_list(root): | |
return sorted(glob.glob(os.path.join(root, '*'))) | |
###################### read images ###################### | |
def _read_img_lmdb(env, key, size): | |
'''read image from lmdb with key (w/ and w/o fixed size) | |
size: (C, H, W) tuple''' | |
with env.begin(write=False) as txn: | |
buf = txn.get(key.encode('ascii')) | |
img_flat = np.frombuffer(buf, dtype=np.uint8) | |
C, H, W = size | |
img = img_flat.reshape(H, W, C) | |
return img | |
def read_img(env, path, size=None): | |
'''read image by cv2 or from lmdb | |
return: Numpy float32, HWC, BGR, [0,1]''' | |
if env is None: # img | |
# print(path) | |
#img = cv2.imread(path, cv2.IMREAD_UNCHANGED) | |
img = cv2.imread(path, cv2.IMREAD_COLOR) | |
else: | |
img = _read_img_lmdb(env, path, size) | |
# print(img.shape) | |
# if img is None: | |
# print(path) | |
# print(img.shape) | |
img = img.astype(np.float32) / 255. | |
if img.ndim == 2: | |
img = np.expand_dims(img, axis=2) | |
# some images have 4 channels | |
if img.shape[2] > 3: | |
img = img[:, :, :3] | |
return img | |
def read_img_seq(path): | |
"""Read a sequence of images from a given folder path | |
Args: | |
path (list/str): list of image paths/image folder path | |
Returns: | |
imgs (Tensor): size (T, C, H, W), RGB, [0, 1] | |
""" | |
if type(path) is list: | |
img_path_l = path | |
else: | |
img_path_l = sorted(glob.glob(os.path.join(path, '*.png'))) | |
# print(path) | |
# print(path,img_path_l) | |
img_l = [read_img(None, v) for v in img_path_l] | |
# stack to Torch tensor | |
imgs = np.stack(img_l, axis=0) | |
imgs = imgs[:, :, :, [2, 1, 0]] | |
imgs = torch.from_numpy(np.ascontiguousarray(np.transpose(imgs, (0, 3, 1, 2)))).float() | |
return imgs | |
def index_generation(crt_i, max_n, N, padding='reflection'): | |
"""Generate an index list for reading N frames from a sequence of images | |
Args: | |
crt_i (int): current center index | |
max_n (int): max number of the sequence of images (calculated from 1) | |
N (int): reading N frames | |
padding (str): padding mode, one of replicate | reflection | new_info | circle | |
Example: crt_i = 0, N = 5 | |
replicate: [0, 0, 0, 1, 2] | |
reflection: [2, 1, 0, 1, 2] | |
new_info: [4, 3, 0, 1, 2] | |
circle: [3, 4, 0, 1, 2] | |
Returns: | |
return_l (list [int]): a list of indexes | |
""" | |
max_n = max_n - 1 | |
n_pad = N // 2 | |
return_l = [] | |
for i in range(crt_i - n_pad, crt_i + n_pad + 1): | |
if i < 0: | |
if padding == 'replicate': | |
add_idx = 0 | |
elif padding == 'reflection': | |
add_idx = -i | |
elif padding == 'new_info': | |
add_idx = (crt_i + n_pad) + (-i) | |
elif padding == 'circle': | |
add_idx = N + i | |
else: | |
raise ValueError('Wrong padding mode') | |
elif i > max_n: | |
if padding == 'replicate': | |
add_idx = max_n | |
elif padding == 'reflection': | |
add_idx = max_n * 2 - i | |
elif padding == 'new_info': | |
add_idx = (crt_i - n_pad) - (i - max_n) | |
elif padding == 'circle': | |
add_idx = i - N | |
else: | |
raise ValueError('Wrong padding mode') | |
else: | |
add_idx = i | |
return_l.append(add_idx) | |
return return_l | |
#################### | |
# image processing | |
# process on numpy image | |
#################### | |
def augment(img_list, hflip=True, rot=True): | |
# horizontal flip OR rotate | |
hflip = hflip and random.random() < 0.5 | |
vflip = rot and random.random() < 0.5 | |
rot90 = rot and random.random() < 0.5 | |
def _augment(img): | |
if hflip: | |
img = img[:, ::-1, :] | |
if vflip: | |
img = img[::-1, :, :] | |
if rot90: | |
img = img.transpose(1, 0, 2) | |
return img | |
return [_augment(img) for img in img_list] | |
def augment_flow(img_list, flow_list, hflip=True, rot=True): | |
# horizontal flip OR rotate | |
hflip = hflip and random.random() < 0.5 | |
vflip = rot and random.random() < 0.5 | |
rot90 = rot and random.random() < 0.5 | |
def _augment(img): | |
if hflip: | |
img = img[:, ::-1, :] | |
if vflip: | |
img = img[::-1, :, :] | |
if rot90: | |
img = img.transpose(1, 0, 2) | |
return img | |
def _augment_flow(flow): | |
if hflip: | |
flow = flow[:, ::-1, :] | |
flow[:, :, 0] *= -1 | |
if vflip: | |
flow = flow[::-1, :, :] | |
flow[:, :, 1] *= -1 | |
if rot90: | |
flow = flow.transpose(1, 0, 2) | |
flow = flow[:, :, [1, 0]] | |
return flow | |
rlt_img_list = [_augment(img) for img in img_list] | |
rlt_flow_list = [_augment_flow(flow) for flow in flow_list] | |
return rlt_img_list, rlt_flow_list | |
def channel_convert(in_c, tar_type, img_list): | |
# conversion among BGR, gray and y | |
if in_c == 3 and tar_type == 'gray': # BGR to gray | |
gray_list = [cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in img_list] | |
return [np.expand_dims(img, axis=2) for img in gray_list] | |
elif in_c == 3 and tar_type == 'y': # BGR to y | |
y_list = [bgr2ycbcr(img, only_y=True) for img in img_list] | |
return [np.expand_dims(img, axis=2) for img in y_list] | |
elif in_c == 1 and tar_type == 'RGB': # gray/y to BGR | |
return [cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) for img in img_list] | |
else: | |
return img_list | |
def rgb2ycbcr(img, only_y=True): | |
'''same as matlab rgb2ycbcr | |
only_y: only return Y channel | |
Input: | |
uint8, [0, 255] | |
float, [0, 1] | |
''' | |
in_img_type = img.dtype | |
img.astype(np.float32) | |
if in_img_type != np.uint8: | |
img *= 255. | |
# convert | |
if only_y: | |
rlt = np.dot(img, [65.481, 128.553, 24.966]) / 255.0 + 16.0 | |
else: | |
rlt = np.matmul(img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786], | |
[24.966, 112.0, -18.214]]) / 255.0 + [16, 128, 128] | |
if in_img_type == np.uint8: | |
rlt = rlt.round() | |
else: | |
rlt /= 255. | |
return rlt.astype(in_img_type) | |
def bgr2ycbcr(img, only_y=True): | |
'''bgr version of rgb2ycbcr | |
only_y: only return Y channel | |
Input: | |
uint8, [0, 255] | |
float, [0, 1] | |
''' | |
in_img_type = img.dtype | |
img.astype(np.float32) | |
if in_img_type != np.uint8: | |
img *= 255. | |
# convert | |
if only_y: | |
rlt = np.dot(img, [24.966, 128.553, 65.481]) / 255.0 + 16.0 | |
else: | |
rlt = np.matmul(img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786], | |
[65.481, -37.797, 112.0]]) / 255.0 + [16, 128, 128] | |
if in_img_type == np.uint8: | |
rlt = rlt.round() | |
else: | |
rlt /= 255. | |
return rlt.astype(in_img_type) | |
def ycbcr2rgb(img): | |
'''same as matlab ycbcr2rgb | |
Input: | |
uint8, [0, 255] | |
float, [0, 1] | |
''' | |
in_img_type = img.dtype | |
img.astype(np.float32) | |
if in_img_type != np.uint8: | |
img *= 255. | |
# convert | |
rlt = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621], [0, -0.00153632, 0.00791071], | |
[0.00625893, -0.00318811, 0]]) * 255.0 + [-222.921, 135.576, -276.836] | |
if in_img_type == np.uint8: | |
rlt = rlt.round() | |
else: | |
rlt /= 255. | |
return rlt.astype(in_img_type) | |
def modcrop(img_in, scale): | |
# img_in: Numpy, HWC or HW | |
img = np.copy(img_in) | |
if img.ndim == 2: | |
H, W = img.shape | |
H_r, W_r = H % scale, W % scale | |
img = img[:H - H_r, :W - W_r] | |
elif img.ndim == 3: | |
H, W, C = img.shape | |
H_r, W_r = H % scale, W % scale | |
img = img[:H - H_r, :W - W_r, :] | |
else: | |
raise ValueError('Wrong img ndim: [{:d}].'.format(img.ndim)) | |
return img | |
#################### | |
# Functions | |
#################### | |
# matlab 'imresize' function, now only support 'bicubic' | |
def cubic(x): | |
absx = torch.abs(x) | |
absx2 = absx**2 | |
absx3 = absx**3 | |
return (1.5 * absx3 - 2.5 * absx2 + 1) * ( | |
(absx <= 1).type_as(absx)) + (-0.5 * absx3 + 2.5 * absx2 - 4 * absx + 2) * (( | |
(absx > 1) * (absx <= 2)).type_as(absx)) | |
def calculate_weights_indices(in_length, out_length, scale, kernel, kernel_width, antialiasing): | |
if (scale < 1) and (antialiasing): | |
# Use a modified kernel to simultaneously interpolate and antialias- larger kernel width | |
kernel_width = kernel_width / scale | |
# Output-space coordinates | |
x = torch.linspace(1, out_length, out_length) | |
# Input-space coordinates. Calculate the inverse mapping such that 0.5 | |
# in output space maps to 0.5 in input space, and 0.5+scale in output | |
# space maps to 1.5 in input space. | |
u = x / scale + 0.5 * (1 - 1 / scale) | |
# What is the left-most pixel that can be involved in the computation? | |
left = torch.floor(u - kernel_width / 2) | |
# What is the maximum number of pixels that can be involved in the | |
# computation? Note: it's OK to use an extra pixel here; if the | |
# corresponding weights are all zero, it will be eliminated at the end | |
# of this function. | |
P = math.ceil(kernel_width) + 2 | |
# The indices of the input pixels involved in computing the k-th output | |
# pixel are in row k of the indices matrix. | |
indices = left.view(out_length, 1).expand(out_length, P) + torch.linspace(0, P - 1, P).view( | |
1, P).expand(out_length, P) | |
# The weights used to compute the k-th output pixel are in row k of the | |
# weights matrix. | |
distance_to_center = u.view(out_length, 1).expand(out_length, P) - indices | |
# apply cubic kernel | |
if (scale < 1) and (antialiasing): | |
weights = scale * cubic(distance_to_center * scale) | |
else: | |
weights = cubic(distance_to_center) | |
# Normalize the weights matrix so that each row sums to 1. | |
weights_sum = torch.sum(weights, 1).view(out_length, 1) | |
weights = weights / weights_sum.expand(out_length, P) | |
# If a column in weights is all zero, get rid of it. only consider the first and last column. | |
weights_zero_tmp = torch.sum((weights == 0), 0) | |
if not math.isclose(weights_zero_tmp[0], 0, rel_tol=1e-6): | |
indices = indices.narrow(1, 1, P - 2) | |
weights = weights.narrow(1, 1, P - 2) | |
if not math.isclose(weights_zero_tmp[-1], 0, rel_tol=1e-6): | |
indices = indices.narrow(1, 0, P - 2) | |
weights = weights.narrow(1, 0, P - 2) | |
weights = weights.contiguous() | |
indices = indices.contiguous() | |
sym_len_s = -indices.min() + 1 | |
sym_len_e = indices.max() - in_length | |
indices = indices + sym_len_s - 1 | |
return weights, indices, int(sym_len_s), int(sym_len_e) | |
def imresize(img, scale, antialiasing=True): | |
# Now the scale should be the same for H and W | |
# input: img: CHW RGB [0,1] | |
# output: CHW RGB [0,1] w/o round | |
in_C, in_H, in_W = img.size() | |
_, out_H, out_W = in_C, math.ceil(in_H * scale), math.ceil(in_W * scale) | |
kernel_width = 4 | |
kernel = 'cubic' | |
# Return the desired dimension order for performing the resize. The | |
# strategy is to perform the resize first along the dimension with the | |
# smallest scale factor. | |
# Now we do not support this. | |
# get weights and indices | |
weights_H, indices_H, sym_len_Hs, sym_len_He = calculate_weights_indices( | |
in_H, out_H, scale, kernel, kernel_width, antialiasing) | |
weights_W, indices_W, sym_len_Ws, sym_len_We = calculate_weights_indices( | |
in_W, out_W, scale, kernel, kernel_width, antialiasing) | |
# process H dimension | |
# symmetric copying | |
img_aug = torch.FloatTensor(in_C, in_H + sym_len_Hs + sym_len_He, in_W) | |
img_aug.narrow(1, sym_len_Hs, in_H).copy_(img) | |
sym_patch = img[:, :sym_len_Hs, :] | |
inv_idx = torch.arange(sym_patch.size(1) - 1, -1, -1).long() | |
sym_patch_inv = sym_patch.index_select(1, inv_idx) | |
img_aug.narrow(1, 0, sym_len_Hs).copy_(sym_patch_inv) | |
sym_patch = img[:, -sym_len_He:, :] | |
inv_idx = torch.arange(sym_patch.size(1) - 1, -1, -1).long() | |
sym_patch_inv = sym_patch.index_select(1, inv_idx) | |
img_aug.narrow(1, sym_len_Hs + in_H, sym_len_He).copy_(sym_patch_inv) | |
out_1 = torch.FloatTensor(in_C, out_H, in_W) | |
kernel_width = weights_H.size(1) | |
for i in range(out_H): | |
idx = int(indices_H[i][0]) | |
out_1[0, i, :] = img_aug[0, idx:idx + kernel_width, :].transpose(0, 1).mv(weights_H[i]) | |
out_1[1, i, :] = img_aug[1, idx:idx + kernel_width, :].transpose(0, 1).mv(weights_H[i]) | |
out_1[2, i, :] = img_aug[2, idx:idx + kernel_width, :].transpose(0, 1).mv(weights_H[i]) | |
# process W dimension | |
# symmetric copying | |
out_1_aug = torch.FloatTensor(in_C, out_H, in_W + sym_len_Ws + sym_len_We) | |
out_1_aug.narrow(2, sym_len_Ws, in_W).copy_(out_1) | |
sym_patch = out_1[:, :, :sym_len_Ws] | |
inv_idx = torch.arange(sym_patch.size(2) - 1, -1, -1).long() | |
sym_patch_inv = sym_patch.index_select(2, inv_idx) | |
out_1_aug.narrow(2, 0, sym_len_Ws).copy_(sym_patch_inv) | |
sym_patch = out_1[:, :, -sym_len_We:] | |
inv_idx = torch.arange(sym_patch.size(2) - 1, -1, -1).long() | |
sym_patch_inv = sym_patch.index_select(2, inv_idx) | |
out_1_aug.narrow(2, sym_len_Ws + in_W, sym_len_We).copy_(sym_patch_inv) | |
out_2 = torch.FloatTensor(in_C, out_H, out_W) | |
kernel_width = weights_W.size(1) | |
for i in range(out_W): | |
idx = int(indices_W[i][0]) | |
out_2[0, :, i] = out_1_aug[0, :, idx:idx + kernel_width].mv(weights_W[i]) | |
out_2[1, :, i] = out_1_aug[1, :, idx:idx + kernel_width].mv(weights_W[i]) | |
out_2[2, :, i] = out_1_aug[2, :, idx:idx + kernel_width].mv(weights_W[i]) | |
return out_2 | |
def imresize_np(img, scale, antialiasing=True): | |
# Now the scale should be the same for H and W | |
# input: img: Numpy, HWC BGR [0,1] | |
# output: HWC BGR [0,1] w/o round | |
img = torch.from_numpy(img) | |
in_H, in_W, in_C = img.size() | |
_, out_H, out_W = in_C, math.ceil(in_H * scale), math.ceil(in_W * scale) | |
kernel_width = 4 | |
kernel = 'cubic' | |
# Return the desired dimension order for performing the resize. The | |
# strategy is to perform the resize first along the dimension with the | |
# smallest scale factor. | |
# Now we do not support this. | |
# get weights and indices | |
weights_H, indices_H, sym_len_Hs, sym_len_He = calculate_weights_indices( | |
in_H, out_H, scale, kernel, kernel_width, antialiasing) | |
weights_W, indices_W, sym_len_Ws, sym_len_We = calculate_weights_indices( | |
in_W, out_W, scale, kernel, kernel_width, antialiasing) | |
# process H dimension | |
# symmetric copying | |
img_aug = torch.FloatTensor(in_H + sym_len_Hs + sym_len_He, in_W, in_C) | |
img_aug.narrow(0, sym_len_Hs, in_H).copy_(img) | |
sym_patch = img[:sym_len_Hs, :, :] | |
inv_idx = torch.arange(sym_patch.size(0) - 1, -1, -1).long() | |
sym_patch_inv = sym_patch.index_select(0, inv_idx) | |
img_aug.narrow(0, 0, sym_len_Hs).copy_(sym_patch_inv) | |
sym_patch = img[-sym_len_He:, :, :] | |
inv_idx = torch.arange(sym_patch.size(0) - 1, -1, -1).long() | |
sym_patch_inv = sym_patch.index_select(0, inv_idx) | |
img_aug.narrow(0, sym_len_Hs + in_H, sym_len_He).copy_(sym_patch_inv) | |
out_1 = torch.FloatTensor(out_H, in_W, in_C) | |
kernel_width = weights_H.size(1) | |
for i in range(out_H): | |
idx = int(indices_H[i][0]) | |
out_1[i, :, 0] = img_aug[idx:idx + kernel_width, :, 0].transpose(0, 1).mv(weights_H[i]) | |
out_1[i, :, 1] = img_aug[idx:idx + kernel_width, :, 1].transpose(0, 1).mv(weights_H[i]) | |
out_1[i, :, 2] = img_aug[idx:idx + kernel_width, :, 2].transpose(0, 1).mv(weights_H[i]) | |
# process W dimension | |
# symmetric copying | |
out_1_aug = torch.FloatTensor(out_H, in_W + sym_len_Ws + sym_len_We, in_C) | |
out_1_aug.narrow(1, sym_len_Ws, in_W).copy_(out_1) | |
sym_patch = out_1[:, :sym_len_Ws, :] | |
inv_idx = torch.arange(sym_patch.size(1) - 1, -1, -1).long() | |
sym_patch_inv = sym_patch.index_select(1, inv_idx) | |
out_1_aug.narrow(1, 0, sym_len_Ws).copy_(sym_patch_inv) | |
sym_patch = out_1[:, -sym_len_We:, :] | |
inv_idx = torch.arange(sym_patch.size(1) - 1, -1, -1).long() | |
sym_patch_inv = sym_patch.index_select(1, inv_idx) | |
out_1_aug.narrow(1, sym_len_Ws + in_W, sym_len_We).copy_(sym_patch_inv) | |
out_2 = torch.FloatTensor(out_H, out_W, in_C) | |
kernel_width = weights_W.size(1) | |
for i in range(out_W): | |
idx = int(indices_W[i][0]) | |
out_2[:, i, 0] = out_1_aug[:, idx:idx + kernel_width, 0].mv(weights_W[i]) | |
out_2[:, i, 1] = out_1_aug[:, idx:idx + kernel_width, 1].mv(weights_W[i]) | |
out_2[:, i, 2] = out_1_aug[:, idx:idx + kernel_width, 2].mv(weights_W[i]) | |
return out_2.numpy() | |
if __name__ == '__main__': | |
# test imresize function | |
# read images | |
img = cv2.imread('test.png') | |
img = img * 1.0 / 255 | |
img = torch.from_numpy(np.transpose(img[:, :, [2, 1, 0]], (2, 0, 1))).float() | |
# imresize | |
scale = 1 / 4 | |
import time | |
total_time = 0 | |
for i in range(10): | |
start_time = time.time() | |
rlt = imresize(img, scale, antialiasing=True) | |
use_time = time.time() - start_time | |
total_time += use_time | |
print('average time: {}'.format(total_time / 10)) | |
import torchvision.utils | |
torchvision.utils.save_image((rlt * 255).round() / 255, 'rlt.png', nrow=1, padding=0, | |
normalize=False) | |