|
|
|
|
|
import math |
|
import numpy as np |
|
import torch |
|
|
|
import numpy as np |
|
from PIL import Image |
|
|
|
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize |
|
|
|
import cv2 |
|
import random |
|
try: |
|
import ffmpeg |
|
except: |
|
pass |
|
import av |
|
import math |
|
|
|
|
|
|
|
def random_short_side_scale_jitter( |
|
images, min_size, max_size, boxes=None, inverse_uniform_sampling=False |
|
): |
|
""" |
|
Perform a spatial short scale jittering on the given images and |
|
corresponding boxes. |
|
Args: |
|
images (tensor): images to perform scale jitter. Dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
min_size (int): the minimal size to scale the frames. |
|
max_size (int): the maximal size to scale the frames. |
|
boxes (ndarray): optional. Corresponding boxes to images. |
|
Dimension is `num boxes` x 4. |
|
inverse_uniform_sampling (bool): if True, sample uniformly in |
|
[1 / max_scale, 1 / min_scale] and take a reciprocal to get the |
|
scale. If False, take a uniform sample from [min_scale, max_scale]. |
|
Returns: |
|
(tensor): the scaled images with dimension of |
|
`num frames` x `channel` x `new height` x `new width`. |
|
(ndarray or None): the scaled boxes with dimension of |
|
`num boxes` x 4. |
|
""" |
|
if inverse_uniform_sampling: |
|
size = int( |
|
round(1.0 / np.random.uniform(1.0 / max_size, 1.0 / min_size)) |
|
) |
|
else: |
|
size = int(round(np.random.uniform(min_size, max_size))) |
|
|
|
height = images.shape[2] |
|
width = images.shape[3] |
|
if (width <= height and width == size) or ( |
|
height <= width and height == size |
|
): |
|
return images, boxes |
|
new_width = size |
|
new_height = size |
|
if width < height: |
|
new_height = int(math.floor((float(height) / width) * size)) |
|
if boxes is not None: |
|
boxes = boxes * float(new_height) / height |
|
else: |
|
new_width = int(math.floor((float(width) / height) * size)) |
|
if boxes is not None: |
|
boxes = boxes * float(new_width) / width |
|
|
|
return ( |
|
torch.nn.functional.interpolate( |
|
images, |
|
size=(new_height, new_width), |
|
mode="bilinear", |
|
align_corners=False, |
|
), |
|
boxes, |
|
) |
|
|
|
|
|
def crop_boxes(boxes, x_offset, y_offset): |
|
""" |
|
Peform crop on the bounding boxes given the offsets. |
|
Args: |
|
boxes (ndarray or None): bounding boxes to peform crop. The dimension |
|
is `num boxes` x 4. |
|
x_offset (int): cropping offset in the x axis. |
|
y_offset (int): cropping offset in the y axis. |
|
Returns: |
|
cropped_boxes (ndarray or None): the cropped boxes with dimension of |
|
`num boxes` x 4. |
|
""" |
|
cropped_boxes = boxes.copy() |
|
cropped_boxes[:, [0, 2]] = boxes[:, [0, 2]] - x_offset |
|
cropped_boxes[:, [1, 3]] = boxes[:, [1, 3]] - y_offset |
|
|
|
return cropped_boxes |
|
|
|
|
|
def random_crop(images, size, boxes=None): |
|
""" |
|
Perform random spatial crop on the given images and corresponding boxes. |
|
Args: |
|
images (tensor): images to perform random crop. The dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
size (int): the size of height and width to crop on the image. |
|
boxes (ndarray or None): optional. Corresponding boxes to images. |
|
Dimension is `num boxes` x 4. |
|
Returns: |
|
cropped (tensor): cropped images with dimension of |
|
`num frames` x `channel` x `size` x `size`. |
|
cropped_boxes (ndarray or None): the cropped boxes with dimension of |
|
`num boxes` x 4. |
|
""" |
|
if images.shape[2] == size and images.shape[3] == size: |
|
return images, None |
|
height = images.shape[2] |
|
width = images.shape[3] |
|
y_offset = 0 |
|
if height > size: |
|
y_offset = int(np.random.randint(0, height - size)) |
|
x_offset = 0 |
|
if width > size: |
|
x_offset = int(np.random.randint(0, width - size)) |
|
cropped = images[ |
|
:, :, y_offset : y_offset + size, x_offset : x_offset + size |
|
] |
|
|
|
cropped_boxes = ( |
|
crop_boxes(boxes, x_offset, y_offset) if boxes is not None else None |
|
) |
|
|
|
return cropped, cropped_boxes |
|
|
|
|
|
def horizontal_flip(prob, images, boxes=None): |
|
""" |
|
Perform horizontal flip on the given images and corresponding boxes. |
|
Args: |
|
prob (float): probility to flip the images. |
|
images (tensor): images to perform horizontal flip, the dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
boxes (ndarray or None): optional. Corresponding boxes to images. |
|
Dimension is `num boxes` x 4. |
|
Returns: |
|
images (tensor): images with dimension of |
|
`num frames` x `channel` x `height` x `width`. |
|
flipped_boxes (ndarray or None): the flipped boxes with dimension of |
|
`num boxes` x 4. |
|
""" |
|
if boxes is None: |
|
flipped_boxes = None |
|
else: |
|
flipped_boxes = boxes.copy() |
|
|
|
if np.random.uniform() < prob: |
|
images = images.flip((-1)) |
|
|
|
width = images.shape[3] |
|
if boxes is not None: |
|
flipped_boxes[:, [0, 2]] = width - boxes[:, [2, 0]] - 1 |
|
|
|
return images, flipped_boxes |
|
|
|
|
|
def uniform_crop(images, size, spatial_idx, boxes=None): |
|
""" |
|
Perform uniform spatial sampling on the images and corresponding boxes. |
|
Args: |
|
images (tensor): images to perform uniform crop. The dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
size (int): size of height and weight to crop the images. |
|
spatial_idx (int): 0, 1, or 2 for left, center, and right crop if width |
|
is larger than height. Or 0, 1, or 2 for top, center, and bottom |
|
crop if height is larger than width. |
|
boxes (ndarray or None): optional. Corresponding boxes to images. |
|
Dimension is `num boxes` x 4. |
|
Returns: |
|
cropped (tensor): images with dimension of |
|
`num frames` x `channel` x `size` x `size`. |
|
cropped_boxes (ndarray or None): the cropped boxes with dimension of |
|
`num boxes` x 4. |
|
""" |
|
assert spatial_idx in [0, 1, 2] |
|
height = images.shape[2] |
|
width = images.shape[3] |
|
|
|
y_offset = int(math.ceil((height - size) / 2)) |
|
x_offset = int(math.ceil((width - size) / 2)) |
|
|
|
if height > width: |
|
if spatial_idx == 0: |
|
y_offset = 0 |
|
elif spatial_idx == 2: |
|
y_offset = height - size |
|
else: |
|
if spatial_idx == 0: |
|
x_offset = 0 |
|
elif spatial_idx == 2: |
|
x_offset = width - size |
|
cropped = images[ |
|
:, :, y_offset : y_offset + size, x_offset : x_offset + size |
|
] |
|
|
|
cropped_boxes = ( |
|
crop_boxes(boxes, x_offset, y_offset) if boxes is not None else None |
|
) |
|
|
|
return cropped, cropped_boxes |
|
|
|
|
|
def uniform_crop_2crops(images, size, spatial_idx, boxes=None): |
|
""" |
|
Perform uniform spatial sampling on the images and corresponding boxes. |
|
Args: |
|
images (tensor): images to perform uniform crop. The dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
size (int): size of height and weight to crop the images. |
|
spatial_idx (int): 0, 1, or 2 for left, center, and right crop if width |
|
is larger than height. Or 0, 1, or 2 for top, center, and bottom |
|
crop if height is larger than width. |
|
boxes (ndarray or None): optional. Corresponding boxes to images. |
|
Dimension is `num boxes` x 4. |
|
Returns: |
|
cropped (tensor): images with dimension of |
|
`num frames` x `channel` x `size` x `size`. |
|
cropped_boxes (ndarray or None): the cropped boxes with dimension of |
|
`num boxes` x 4. |
|
""" |
|
assert spatial_idx in [0, 1, 2] |
|
height = images.shape[2] |
|
width = images.shape[3] |
|
|
|
|
|
if height > width: |
|
x_offset = 0 |
|
if height > size * 2: |
|
if spatial_idx == 0: |
|
y_offset = int((height - size * 2) // 2) |
|
elif spatial_idx == 1: |
|
y_offset = int(height - size - ((height - size * 2) // 2)) |
|
else: |
|
if spatial_idx == 0: |
|
y_offset = 0 |
|
elif spatial_idx == 1: |
|
y_offset = height - size |
|
else: |
|
y_offset = 0 |
|
if width > size * 2: |
|
if spatial_idx == 0: |
|
x_offset = int((width - size * 2) // 2) |
|
elif spatial_idx == 1: |
|
x_offset = int(width - size - ((width - size * 2) // 2)) |
|
else: |
|
if spatial_idx == 0: |
|
x_offset = 0 |
|
elif spatial_idx == 1: |
|
x_offset = width - size |
|
|
|
cropped = images[ |
|
:, :, y_offset : y_offset + size, x_offset : x_offset + size |
|
] |
|
|
|
cropped_boxes = ( |
|
crop_boxes(boxes, x_offset, y_offset) if boxes is not None else None |
|
) |
|
|
|
return cropped, cropped_boxes |
|
|
|
def clip_boxes_to_image(boxes, height, width): |
|
""" |
|
Clip an array of boxes to an image with the given height and width. |
|
Args: |
|
boxes (ndarray): bounding boxes to perform clipping. |
|
Dimension is `num boxes` x 4. |
|
height (int): given image height. |
|
width (int): given image width. |
|
Returns: |
|
clipped_boxes (ndarray): the clipped boxes with dimension of |
|
`num boxes` x 4. |
|
""" |
|
clipped_boxes = boxes.copy() |
|
clipped_boxes[:, [0, 2]] = np.minimum( |
|
width - 1.0, np.maximum(0.0, boxes[:, [0, 2]]) |
|
) |
|
clipped_boxes[:, [1, 3]] = np.minimum( |
|
height - 1.0, np.maximum(0.0, boxes[:, [1, 3]]) |
|
) |
|
return clipped_boxes |
|
|
|
|
|
def blend(images1, images2, alpha): |
|
""" |
|
Blend two images with a given weight alpha. |
|
Args: |
|
images1 (tensor): the first images to be blended, the dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
images2 (tensor): the second images to be blended, the dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
alpha (float): the blending weight. |
|
Returns: |
|
(tensor): blended images, the dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
""" |
|
return images1 * alpha + images2 * (1 - alpha) |
|
|
|
|
|
def grayscale(images): |
|
""" |
|
Get the grayscale for the input images. The channels of images should be |
|
in order BGR. |
|
Args: |
|
images (tensor): the input images for getting grayscale. Dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
Returns: |
|
img_gray (tensor): blended images, the dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
""" |
|
|
|
img_gray = torch.tensor(images) |
|
gray_channel = ( |
|
0.299 * images[:, 2] + 0.587 * images[:, 1] + 0.114 * images[:, 0] |
|
) |
|
img_gray[:, 0] = gray_channel |
|
img_gray[:, 1] = gray_channel |
|
img_gray[:, 2] = gray_channel |
|
return img_gray |
|
|
|
|
|
def color_jitter(images, img_brightness=0, img_contrast=0, img_saturation=0): |
|
""" |
|
Perfrom a color jittering on the input images. The channels of images |
|
should be in order BGR. |
|
Args: |
|
images (tensor): images to perform color jitter. Dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
img_brightness (float): jitter ratio for brightness. |
|
img_contrast (float): jitter ratio for contrast. |
|
img_saturation (float): jitter ratio for saturation. |
|
Returns: |
|
images (tensor): the jittered images, the dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
""" |
|
|
|
jitter = [] |
|
if img_brightness != 0: |
|
jitter.append("brightness") |
|
if img_contrast != 0: |
|
jitter.append("contrast") |
|
if img_saturation != 0: |
|
jitter.append("saturation") |
|
|
|
if len(jitter) > 0: |
|
order = np.random.permutation(np.arange(len(jitter))) |
|
for idx in range(0, len(jitter)): |
|
if jitter[order[idx]] == "brightness": |
|
images = brightness_jitter(img_brightness, images) |
|
elif jitter[order[idx]] == "contrast": |
|
images = contrast_jitter(img_contrast, images) |
|
elif jitter[order[idx]] == "saturation": |
|
images = saturation_jitter(img_saturation, images) |
|
return images |
|
|
|
|
|
def brightness_jitter(var, images): |
|
""" |
|
Perfrom brightness jittering on the input images. The channels of images |
|
should be in order BGR. |
|
Args: |
|
var (float): jitter ratio for brightness. |
|
images (tensor): images to perform color jitter. Dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
Returns: |
|
images (tensor): the jittered images, the dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
""" |
|
alpha = 1.0 + np.random.uniform(-var, var) |
|
|
|
img_bright = torch.zeros(images.shape) |
|
images = blend(images, img_bright, alpha) |
|
return images |
|
|
|
|
|
def contrast_jitter(var, images): |
|
""" |
|
Perfrom contrast jittering on the input images. The channels of images |
|
should be in order BGR. |
|
Args: |
|
var (float): jitter ratio for contrast. |
|
images (tensor): images to perform color jitter. Dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
Returns: |
|
images (tensor): the jittered images, the dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
""" |
|
alpha = 1.0 + np.random.uniform(-var, var) |
|
|
|
img_gray = grayscale(images) |
|
img_gray[:] = torch.mean(img_gray, dim=(1, 2, 3), keepdim=True) |
|
images = blend(images, img_gray, alpha) |
|
return images |
|
|
|
|
|
def saturation_jitter(var, images): |
|
""" |
|
Perfrom saturation jittering on the input images. The channels of images |
|
should be in order BGR. |
|
Args: |
|
var (float): jitter ratio for saturation. |
|
images (tensor): images to perform color jitter. Dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
Returns: |
|
images (tensor): the jittered images, the dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
""" |
|
alpha = 1.0 + np.random.uniform(-var, var) |
|
img_gray = grayscale(images) |
|
images = blend(images, img_gray, alpha) |
|
|
|
return images |
|
|
|
|
|
def lighting_jitter(images, alphastd, eigval, eigvec): |
|
""" |
|
Perform AlexNet-style PCA jitter on the given images. |
|
Args: |
|
images (tensor): images to perform lighting jitter. Dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
alphastd (float): jitter ratio for PCA jitter. |
|
eigval (list): eigenvalues for PCA jitter. |
|
eigvec (list[list]): eigenvectors for PCA jitter. |
|
Returns: |
|
out_images (tensor): the jittered images, the dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
""" |
|
if alphastd == 0: |
|
return images |
|
|
|
alpha = np.random.normal(0, alphastd, size=(1, 3)) |
|
eig_vec = np.array(eigvec) |
|
eig_val = np.reshape(eigval, (1, 3)) |
|
rgb = np.sum( |
|
eig_vec * np.repeat(alpha, 3, axis=0) * np.repeat(eig_val, 3, axis=0), |
|
axis=1, |
|
) |
|
out_images = torch.zeros_like(images) |
|
for idx in range(images.shape[1]): |
|
out_images[:, idx] = images[:, idx] + rgb[2 - idx] |
|
|
|
return out_images |
|
|
|
|
|
def color_normalization(images, mean, stddev): |
|
""" |
|
Perform color nomration on the given images. |
|
Args: |
|
images (tensor): images to perform color normalization. Dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
mean (list): mean values for normalization. |
|
stddev (list): standard deviations for normalization. |
|
Returns: |
|
out_images (tensor): the noramlized images, the dimension is |
|
`num frames` x `channel` x `height` x `width`. |
|
""" |
|
assert len(mean) == images.shape[1], "channel mean not computed properly" |
|
assert ( |
|
len(stddev) == images.shape[1] |
|
), "channel stddev not computed properly" |
|
|
|
out_images = torch.zeros_like(images) |
|
for idx in range(len(mean)): |
|
out_images[:, idx] = (images[:, idx] - mean[idx]) / stddev[idx] |
|
|
|
return out_images |
|
|
|
|
|
|
|
|
|
|
|
class RawVideoExtractorCV2(): |
|
def __init__(self, centercrop=False, size=224, framerate=-1, ): |
|
self.centercrop = centercrop |
|
self.size = size |
|
self.framerate = framerate |
|
self.transform = self._transform(self.size) |
|
|
|
self.mean = np.array((0.48145466, 0.4578275, 0.40821073)).reshape(1, 1, 1, 3) * 255 |
|
self.std = np.array((0.26862954, 0.26130258, 0.27577711)).reshape(1, 1, 1, 3) * 255 |
|
|
|
def _transform(self, n_px): |
|
return Compose([ |
|
Resize(n_px, interpolation=Image.BICUBIC), |
|
CenterCrop(n_px), |
|
lambda image: image.convert("RGB"), |
|
ToTensor(), |
|
Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), |
|
]) |
|
|
|
def video_to_tensor(self, video_file, preprocess, sample_fp=0, num_frames=50, sample_offset=0, start_time=None, end_time=None, impl="pyav"): |
|
if start_time is not None or end_time is not None: |
|
assert isinstance(start_time, int) and isinstance(end_time, int) \ |
|
and start_time > -1 and end_time > start_time |
|
|
|
|
|
if impl == "cv2": |
|
|
|
cap = cv2.VideoCapture(video_file) |
|
frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
fps = int(cap.get(cv2.CAP_PROP_FPS)) |
|
|
|
total_duration = (frameCount + fps - 1) // fps |
|
start_sec, end_sec = 0, total_duration |
|
|
|
if start_time is not None: |
|
start_sec, end_sec = start_time, end_time if end_time <= total_duration else total_duration |
|
cap.set(cv2.CAP_PROP_POS_FRAMES, int(start_time * fps)) |
|
|
|
ret = True |
|
images, included = [], [] |
|
|
|
|
|
if sample_fp > -1: |
|
|
|
if impl == "cv2": |
|
|
|
|
|
interval = 1 |
|
if sample_fp > 0: |
|
interval = fps // sample_fp |
|
else: |
|
sample_fp = fps |
|
if interval == 0: interval = 1 |
|
|
|
inds = [ind for ind in np.arange(0, fps, interval)] |
|
assert len(inds) >= sample_fp |
|
inds = inds[:sample_fp] |
|
|
|
offset = min(sample_offset, interval - 1) if sample_offset > 0 else random.randint(0, interval - 1) |
|
for sec in np.arange(start_sec, end_sec + 1): |
|
if not ret: break |
|
|
|
sec_base = int(sec * fps + offset) |
|
for ind in inds: |
|
cap.set(cv2.CAP_PROP_POS_FRAMES, sec_base + ind) |
|
ret, frame = cap.read() |
|
if not ret: break |
|
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
|
images.append(preprocess(Image.fromarray(frame_rgb).convert("RGB"))) |
|
|
|
if len(images) > 0: |
|
video_data = torch.tensor(np.stack(images)) |
|
else: |
|
video_data = torch.zeros(1) |
|
cap.release() |
|
|
|
elif impl == "ffmpeg": |
|
|
|
if sample_fp == 0: |
|
sample_fp = 1000 |
|
|
|
out, _ = ( |
|
ffmpeg |
|
.input(video_file) |
|
.filter('select', 'isnan(prev_selected_t)+gte(t-prev_selected_t,{})'.format(1 / sample_fp)) |
|
.filter('crop', 'min(in_w, in_h)', 'min(in_w, in_h)', '(in_w - min(in_w, in_h)) / 2', '(in_h - min(in_w, in_h)) / 2') |
|
.filter('scale', self.size, self.size) |
|
.output('pipe:', format='rawvideo', pix_fmt='rgb24', vsync='vfr') |
|
.global_args('-loglevel', 'quiet') |
|
.run(capture_stdout=True) |
|
) |
|
video = ( |
|
np |
|
.frombuffer(out, np.uint8) |
|
.reshape([-1, self.size, self.size, 3]) |
|
) |
|
|
|
video = (video - self.mean) / self.std |
|
video_data = torch.as_tensor(video).permute(0, 3, 1, 2) |
|
|
|
elif impl == 'pyav': |
|
images = list() |
|
container = av.open(video_file) |
|
container.streams.video[0].thread_type = "AUTO" |
|
stream = container.streams.video[0] |
|
total_frames = stream.frames |
|
assert total_frames != 0 |
|
duration = int(stream.duration * stream.time_base) |
|
|
|
if sample_fp > 0: |
|
interval = max(int(total_frames / duration / sample_fp), 1) |
|
else: |
|
interval = 1 |
|
for frame in container.decode(stream): |
|
if frame.index % interval != 0: |
|
continue |
|
images.append(preprocess(frame.to_rgb().to_image())) |
|
|
|
if len(images) > 0: |
|
video_data = torch.stack(images) |
|
else: |
|
video_data = torch.zeros(1) |
|
container.close() |
|
|
|
else: |
|
raise NotImplementedError |
|
|
|
else: |
|
if impl == "cv2": |
|
|
|
interval = max(frameCount // num_frames, 1) |
|
start = min(sample_offset, interval - 1) if sample_offset > -1 else random.randint(0, interval - 1) |
|
interval = frameCount / num_frames |
|
for i in range(num_frames): |
|
cap.set(cv2.CAP_PROP_POS_FRAMES, start + int(i * interval)) |
|
ret, frame = cap.read() |
|
if not ret: break |
|
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
|
images.append(preprocess(Image.fromarray(frame_rgb).convert("RGB"))) |
|
|
|
if len(images) > 0: |
|
video_data = torch.tensor(np.stack(images)) |
|
else: |
|
video_data = torch.zeros(1) |
|
cap.release() |
|
elif impl == "pyav": |
|
images = list() |
|
container = av.open(video_file) |
|
container.streams.video[0].thread_type = "AUTO" |
|
stream = container.streams.video[0] |
|
total_frames = stream.frames |
|
assert total_frames != 0 |
|
interval = max(total_frames // num_frames, 1) |
|
for frame in container.decode(stream): |
|
if frame.index % interval != 0: |
|
continue |
|
images.append(preprocess(frame.to_rgb().to_image())) |
|
|
|
if len(images) > 0: |
|
video_data = torch.stack(images) |
|
else: |
|
video_data = torch.zeros(1) |
|
container.close() |
|
else: |
|
raise NotImplementedError |
|
|
|
return {'video': video_data} |
|
|
|
def get_video_data(self, video_path, num_frames, sample_offset, start_time=None, end_time=None): |
|
image_input = self.video_to_tensor(video_path, self.transform, sample_fp=self.framerate, num_frames=num_frames, sample_offset=sample_offset, start_time=start_time, end_time=end_time) |
|
return image_input |
|
|
|
def process_raw_data(self, raw_video_data): |
|
tensor_size = raw_video_data.size() |
|
tensor = raw_video_data.view(-1, 1, tensor_size[-3], tensor_size[-2], tensor_size[-1]) |
|
return tensor |
|
|
|
def process_frame_order(self, raw_video_data, frame_order=0): |
|
|
|
if frame_order == 0: |
|
pass |
|
elif frame_order == 1: |
|
reverse_order = np.arange(raw_video_data.size(0) - 1, -1, -1) |
|
raw_video_data = raw_video_data[reverse_order, ...] |
|
elif frame_order == 2: |
|
random_order = np.arange(raw_video_data.size(0)) |
|
np.random.shuffle(random_order) |
|
raw_video_data = raw_video_data[random_order, ...] |
|
|
|
return raw_video_data |
|
|
|
|
|
RawVideoExtractor = RawVideoExtractorCV2 |
|
|