Spaces:

chendl
/

compositional_test

Runtime error

App Files Files Community

compositional_test / multimodal /YOLOX /yolox /data /data_augment.py

chendl

Add application file

0b7b08a over 1 year ago

raw

history blame contribute delete

7.36 kB

	#!/usr/bin/env python3
	# -- coding:utf-8 --
	# Copyright (c) Megvii, Inc. and its affiliates.
	"""
	Data augmentation functionality. Passed as callable transformations to
	Dataset classes.

	The data augmentation procedures were interpreted from @weiliu89's SSD paper
	http://arxiv.org/abs/1512.02325
	"""

	import math
	import random

	import cv2
	import numpy as np

	from yolox.utils import xyxy2cxcywh


	def augment_hsv(img, hgain=5, sgain=30, vgain=30):
	hsv_augs = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] # random gains
	hsv_augs *= np.random.randint(0, 2, 3) # random selection of h, s, v
	hsv_augs = hsv_augs.astype(np.int16)
	img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int16)

	img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180
	img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255)
	img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255)

	cv2.cvtColor(img_hsv.astype(img.dtype), cv2.COLOR_HSV2BGR, dst=img) # no return needed


	def get_aug_params(value, center=0):
	if isinstance(value, float):
	return random.uniform(center - value, center + value)
	elif len(value) == 2:
	return random.uniform(value[0], value[1])
	else:
	raise ValueError(
	"Affine params should be either a sequence containing two values\
	or single float values. Got {}".format(value)
	)


	def get_affine_matrix(
	target_size,
	degrees=10,
	translate=0.1,
	scales=0.1,
	shear=10,
	):
	twidth, theight = target_size

	# Rotation and Scale
	angle = get_aug_params(degrees)
	scale = get_aug_params(scales, center=1.0)

	if scale <= 0.0:
	raise ValueError("Argument scale should be positive")

	R = cv2.getRotationMatrix2D(angle=angle, center=(0, 0), scale=scale)

	M = np.ones([2, 3])
	# Shear
	shear_x = math.tan(get_aug_params(shear) * math.pi / 180)
	shear_y = math.tan(get_aug_params(shear) * math.pi / 180)

	M[0] = R[0] + shear_y * R[1]
	M[1] = R[1] + shear_x * R[0]

	# Translation
	translation_x = get_aug_params(translate) * twidth # x translation (pixels)
	translation_y = get_aug_params(translate) * theight # y translation (pixels)

	M[0, 2] = translation_x
	M[1, 2] = translation_y

	return M, scale


	def apply_affine_to_bboxes(targets, target_size, M, scale):
	num_gts = len(targets)

	# warp corner points
	twidth, theight = target_size
	corner_points = np.ones((4 * num_gts, 3))
	corner_points[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
	4 * num_gts, 2
	) # x1y1, x2y2, x1y2, x2y1
	corner_points = corner_points @ M.T # apply affine transform
	corner_points = corner_points.reshape(num_gts, 8)

	# create new boxes
	corner_xs = corner_points[:, 0::2]
	corner_ys = corner_points[:, 1::2]
	new_bboxes = (
	np.concatenate(
	(corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1))
	)
	.reshape(4, num_gts)
	.T
	)

	# clip boxes
	new_bboxes[:, 0::2] = new_bboxes[:, 0::2].clip(0, twidth)
	new_bboxes[:, 1::2] = new_bboxes[:, 1::2].clip(0, theight)

	targets[:, :4] = new_bboxes

	return targets


	def random_affine(
	img,
	targets=(),
	target_size=(640, 640),
	degrees=10,
	translate=0.1,
	scales=0.1,
	shear=10,
	):
	M, scale = get_affine_matrix(target_size, degrees, translate, scales, shear)

	img = cv2.warpAffine(img, M, dsize=target_size, borderValue=(114, 114, 114))

	# Transform label coordinates
	if len(targets) > 0:
	targets = apply_affine_to_bboxes(targets, target_size, M, scale)

	return img, targets


	def _mirror(image, boxes, prob=0.5):
	_, width, _ = image.shape
	if random.random() < prob:
	image = image[:, ::-1]
	boxes[:, 0::2] = width - boxes[:, 2::-2]
	return image, boxes


	def preproc(img, input_size, swap=(2, 0, 1)):
	if len(img.shape) == 3:
	padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
	else:
	padded_img = np.ones(input_size, dtype=np.uint8) * 114

	r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
	resized_img = cv2.resize(
	img,
	(int(img.shape[1] * r), int(img.shape[0] * r)),
	interpolation=cv2.INTER_LINEAR,
	).astype(np.uint8)
	padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img

	padded_img = padded_img.transpose(swap)
	padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
	return padded_img, r


	class TrainTransform:
	def __init__(self, max_labels=50, flip_prob=0.5, hsv_prob=1.0):
	self.max_labels = max_labels
	self.flip_prob = flip_prob
	self.hsv_prob = hsv_prob

	def __call__(self, image, targets, input_dim):
	boxes = targets[:, :4].copy()
	labels = targets[:, 4].copy()
	if len(boxes) == 0:
	targets = np.zeros((self.max_labels, 5), dtype=np.float32)
	image, r_o = preproc(image, input_dim)
	return image, targets

	image_o = image.copy()
	targets_o = targets.copy()
	height_o, width_o, _ = image_o.shape
	boxes_o = targets_o[:, :4]
	labels_o = targets_o[:, 4]
	# bbox_o: [xyxy] to [c_x,c_y,w,h]
	boxes_o = xyxy2cxcywh(boxes_o)

	if random.random() < self.hsv_prob:
	augment_hsv(image)
	image_t, boxes = _mirror(image, boxes, self.flip_prob)
	height, width, _ = image_t.shape
	image_t, r_ = preproc(image_t, input_dim)
	# boxes [xyxy] 2 [cx,cy,w,h]
	boxes = xyxy2cxcywh(boxes)
	boxes *= r_

	mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1
	boxes_t = boxes[mask_b]
	labels_t = labels[mask_b]

	if len(boxes_t) == 0:
	image_t, r_o = preproc(image_o, input_dim)
	boxes_o *= r_o
	boxes_t = boxes_o
	labels_t = labels_o

	labels_t = np.expand_dims(labels_t, 1)

	targets_t = np.hstack((labels_t, boxes_t))
	padded_labels = np.zeros((self.max_labels, 5))
	padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
	: self.max_labels
	]
	padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
	return image_t, padded_labels


	class ValTransform:
	"""
	Defines the transformations that should be applied to test PIL image
	for input into the network

	dimension -> tensorize -> color adj

	Arguments:
	resize (int): input dimension to SSD
	rgb_means ((int,int,int)): average RGB of the dataset
	(104,117,123)
	swap ((int,int,int)): final order of channels

	Returns:
	transform (transform) : callable transform to be applied to test/val
	data
	"""

	def __init__(self, swap=(2, 0, 1), legacy=False):
	self.swap = swap
	self.legacy = legacy

	# assume input is cv2 img for now
	def __call__(self, img, res, input_size):
	img, _ = preproc(img, input_size, self.swap)
	if self.legacy:
	img = img[::-1, :, :].copy()
	img /= 255.0
	img -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
	img /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
	return img, np.zeros((1, 5))