Spaces:

henry000
/

YOLO

Running

App Files Files Community

YOLO / yolo /tools /data_augmentation.py

henry000

💚 [Pass] format check and refactor code

fa548df 6 months ago

raw

history blame

7.82 kB

	from typing import List

	import numpy as np
	import torch
	from PIL import Image
	from torchvision.transforms import functional as TF


	class AugmentationComposer:
	"""Composes several transforms together."""

	def __init__(self, transforms, image_size: int = [640, 640], base_size: int = 640):
	self.transforms = transforms
	# TODO: handle List of image_size [640, 640]
	self.pad_resize = PadAndResize(image_size)
	self.base_size = base_size

	for transform in self.transforms:
	if hasattr(transform, "set_parent"):
	transform.set_parent(self)

	def __call__(self, image, boxes=torch.zeros(0, 5)):
	for transform in self.transforms:
	image, boxes = transform(image, boxes)
	image, boxes, rev_tensor = self.pad_resize(image, boxes)
	image = TF.to_tensor(image)
	return image, boxes, rev_tensor


	class RemoveOutliers:
	"""Removes outlier bounding boxes that are too small or have invalid dimensions."""

	def __init__(self, min_box_area=1e-8):
	"""
	Args:
	min_box_area (float): Minimum area for a box to be kept, as a fraction of the image area.
	"""
	self.min_box_area = min_box_area

	def __call__(self, image, boxes):
	"""
	Args:
	image (PIL.Image): The cropped image.
	boxes (torch.Tensor): Bounding boxes in normalized coordinates (x_min, y_min, x_max, y_max).
	Returns:
	PIL.Image: The input image (unchanged).
	torch.Tensor: Filtered bounding boxes.
	"""
	box_areas = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 4] - boxes[:, 2])

	valid_boxes = (box_areas > self.min_box_area) & (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 4] > boxes[:, 2])

	return image, boxes[valid_boxes]


	class PadAndResize:
	def __init__(self, image_size, background_color=(114, 114, 114)):
	"""Initialize the object with the target image size."""
	self.target_width, self.target_height = image_size
	self.background_color = background_color

	def set_size(self, image_size: List[int]):
	self.target_width, self.target_height = image_size

	def __call__(self, image: Image, boxes):
	img_width, img_height = image.size
	scale = min(self.target_width / img_width, self.target_height / img_height)
	new_width, new_height = int(img_width * scale), int(img_height * scale)

	resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)

	pad_left = (self.target_width - new_width) // 2
	pad_top = (self.target_height - new_height) // 2
	padded_image = Image.new("RGB", (self.target_width, self.target_height), self.background_color)
	padded_image.paste(resized_image, (pad_left, pad_top))

	boxes[:, [1, 3]] = (boxes[:, [1, 3]] * new_width + pad_left) / self.target_width
	boxes[:, [2, 4]] = (boxes[:, [2, 4]] * new_height + pad_top) / self.target_height

	transform_info = torch.tensor([scale, pad_left, pad_top, pad_left, pad_top])
	return padded_image, boxes, transform_info


	class HorizontalFlip:
	"""Randomly horizontally flips the image along with the bounding boxes."""

	def __init__(self, prob=0.5):
	self.prob = prob

	def __call__(self, image, boxes):
	if torch.rand(1) < self.prob:
	image = TF.hflip(image)
	boxes[:, [1, 3]] = 1 - boxes[:, [3, 1]]
	return image, boxes


	class VerticalFlip:
	"""Randomly vertically flips the image along with the bounding boxes."""

	def __init__(self, prob=0.5):
	self.prob = prob

	def __call__(self, image, boxes):
	if torch.rand(1) < self.prob:
	image = TF.vflip(image)
	boxes[:, [2, 4]] = 1 - boxes[:, [4, 2]]
	return image, boxes


	class Mosaic:
	"""Applies the Mosaic augmentation to a batch of images and their corresponding boxes."""

	def __init__(self, prob=0.5):
	self.prob = prob
	self.parent = None

	def set_parent(self, parent):
	self.parent = parent

	def __call__(self, image, boxes):
	if torch.rand(1) >= self.prob:
	return image, boxes

	assert self.parent is not None, "Parent is not set. Mosaic cannot retrieve image size."

	img_sz = self.parent.base_size # Assuming `image_size` is defined in parent
	more_data = self.parent.get_more_data(3) # get 3 more images randomly

	data = [(image, boxes)] + more_data
	mosaic_image = Image.new("RGB", (2 * img_sz, 2 * img_sz), (114, 114, 114))
	vectors = np.array([(-1, -1), (0, -1), (-1, 0), (0, 0)])
	center = np.array([img_sz, img_sz])
	all_labels = []

	for (image, boxes), vector in zip(data, vectors):
	this_w, this_h = image.size
	coord = tuple(center + vector * np.array([this_w, this_h]))

	mosaic_image.paste(image, coord)
	xmin, ymin, xmax, ymax = boxes[:, 1], boxes[:, 2], boxes[:, 3], boxes[:, 4]
	xmin = (xmin * this_w + coord[0]) / (2 * img_sz)
	xmax = (xmax * this_w + coord[0]) / (2 * img_sz)
	ymin = (ymin * this_h + coord[1]) / (2 * img_sz)
	ymax = (ymax * this_h + coord[1]) / (2 * img_sz)

	adjusted_boxes = torch.stack([boxes[:, 0], xmin, ymin, xmax, ymax], dim=1)
	all_labels.append(adjusted_boxes)

	all_labels = torch.cat(all_labels, dim=0)
	mosaic_image = mosaic_image.resize((img_sz, img_sz))
	return mosaic_image, all_labels


	class MixUp:
	"""Applies the MixUp augmentation to a pair of images and their corresponding boxes."""

	def __init__(self, prob=0.5, alpha=1.0):
	self.alpha = alpha
	self.prob = prob
	self.parent = None

	def set_parent(self, parent):
	"""Set the parent dataset object for accessing dataset methods."""
	self.parent = parent

	def __call__(self, image, boxes):
	if torch.rand(1) >= self.prob:
	return image, boxes

	assert self.parent is not None, "Parent is not set. MixUp cannot retrieve additional data."

	# Retrieve another image and its boxes randomly from the dataset
	image2, boxes2 = self.parent.get_more_data()[0]

	# Calculate the mixup lambda parameter
	lam = np.random.beta(self.alpha, self.alpha) if self.alpha > 0 else 0.5

	# Mix images
	image1, image2 = TF.to_tensor(image), TF.to_tensor(image2)
	mixed_image = lam * image1 + (1 - lam) * image2

	# Merge bounding boxes
	merged_boxes = torch.cat((boxes, boxes2))

	return TF.to_pil_image(mixed_image), merged_boxes


	class RandomCrop:
	"""Randomly crops the image to half its size along with adjusting the bounding boxes."""

	def __init__(self, prob=0.5):
	"""
	Args:
	prob (float): Probability of applying the crop.
	"""
	self.prob = prob

	def __call__(self, image, boxes):
	if torch.rand(1) < self.prob:
	original_width, original_height = image.size
	crop_height, crop_width = original_height // 2, original_width // 2
	top = torch.randint(0, original_height - crop_height + 1, (1,)).item()
	left = torch.randint(0, original_width - crop_width + 1, (1,)).item()

	image = TF.crop(image, top, left, crop_height, crop_width)

	boxes[:, [1, 3]] = boxes[:, [1, 3]] * original_width - left
	boxes[:, [2, 4]] = boxes[:, [2, 4]] * original_height - top

	boxes[:, [1, 3]] = boxes[:, [1, 3]].clamp(0, crop_width)
	boxes[:, [2, 4]] = boxes[:, [2, 4]].clamp(0, crop_height)

	boxes[:, [1, 3]] /= crop_width
	boxes[:, [2, 4]] /= crop_height

	return image, boxes