import matplotlib.pyplot as plt |
import numpy as np |
import torch |
def imshow(ims, ax=None, t=0, vmin=None, vmax=None, title=None, cmap=None, fontsize=20): |
if ax is None: |
fig, ax = plt.subplots(1,1) |
with torch.no_grad(): |
im = ims[t].float().cpu().numpy().transpose((1,2,0)) |
if (vmin is not None) and (vmax is not None): |
im =ax.imshow(im, vmin=vmin, vmax=vmax, cmap=(cmap or 'viridis')) |
else: |
im =ax.imshow(im) |
if title is not None: |
ax.set_title(title, fontsize=fontsize) |
return (im, ax) |
def make_colorwheel(): |
""" |
Generates a color wheel for optical flow visualization as presented in: |
Baker et al. "A Database and Evaluation Methodology for Optical Flow" (ICCV, 2007) |
URL: http://vision.middlebury.edu/flow/flowEval-iccv07.pdf |
Code follows the original C++ source code of Daniel Scharstein. |
Code follows the the Matlab source code of Deqing Sun. |
Returns: |
np.ndarray: Color wheel |
""" |
RY = 15 |
YG = 6 |
GC = 4 |
CB = 11 |
BM = 13 |
MR = 6 |
ncols = RY + YG + GC + CB + BM + MR |
colorwheel = np.zeros((ncols, 3)) |
col = 0 |
colorwheel[0:RY, 0] = 255 |
colorwheel[0:RY, 1] = np.floor(255*np.arange(0,RY)/RY) |
col = col+RY |
colorwheel[col:col+YG, 0] = 255 - np.floor(255*np.arange(0,YG)/YG) |
colorwheel[col:col+YG, 1] = 255 |
col = col+YG |
colorwheel[col:col+GC, 1] = 255 |
colorwheel[col:col+GC, 2] = np.floor(255*np.arange(0,GC)/GC) |
col = col+GC |
colorwheel[col:col+CB, 1] = 255 - np.floor(255*np.arange(CB)/CB) |
colorwheel[col:col+CB, 2] = 255 |
col = col+CB |
colorwheel[col:col+BM, 2] = 255 |
colorwheel[col:col+BM, 0] = np.floor(255*np.arange(0,BM)/BM) |
col = col+BM |
colorwheel[col:col+MR, 2] = 255 - np.floor(255*np.arange(MR)/MR) |
colorwheel[col:col+MR, 0] = 255 |
return colorwheel |
def flow_uv_to_colors(u, v, convert_to_bgr=False): |
""" |
Applies the flow color wheel to (possibly clipped) flow components u and v. |
According to the C++ source code of Daniel Scharstein |
According to the Matlab source code of Deqing Sun |
Args: |
u (np.ndarray): Input horizontal flow of shape [H,W] |
v (np.ndarray): Input vertical flow of shape [H,W] |
convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False. |
Returns: |
np.ndarray: Flow visualization image of shape [H,W,3] |
""" |
flow_image = np.zeros((u.shape[0], u.shape[1], 3), np.uint8) |
colorwheel = make_colorwheel() |
ncols = colorwheel.shape[0] |
rad = np.sqrt(np.square(u) + np.square(v)) |
a = np.arctan2(-v, -u)/np.pi |
fk = (a+1) / 2*(ncols-1) |
k0 = np.floor(fk).astype(np.int32) |
k1 = k0 + 1 |
k1[k1 == ncols] = 0 |
f = fk - k0 |
for i in range(colorwheel.shape[1]): |
tmp = colorwheel[:,i] |
col0 = tmp[k0] / 255.0 |
col1 = tmp[k1] / 255.0 |
col = (1-f)*col0 + f*col1 |
idx = (rad <= 1) |
col[idx] = 1 - rad[idx] * (1-col[idx]) |
col[~idx] = col[~idx] * 0.75 |
ch_idx = 2-i if convert_to_bgr else i |
flow_image[:,:,ch_idx] = np.floor(255 * col) |
return flow_image |
def flow_to_image(flow_uv, clip_flow=None, convert_to_bgr=False): |
""" |
Expects a two dimensional flow image of shape. |
Args: |
flow_uv (np.ndarray): Flow UV image of shape [H,W,2] |
clip_flow (float, optional): Clip maximum of flow values. Defaults to None. |
convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False. |
Returns: |
np.ndarray: Flow visualization image of shape [H,W,3] |
""" |
assert flow_uv.ndim == 3, 'input flow must have three dimensions' |
assert flow_uv.shape[2] == 2, 'input flow must have shape [H,W,2]' |
if clip_flow is not None: |
flow_uv = np.clip(flow_uv, 0, clip_flow) |
u = flow_uv[:,:,0] |
v = flow_uv[:,:,1] |
rad = np.sqrt(np.square(u) + np.square(v)) |
rad_max = np.max(rad) |
epsilon = 1e-5 |
u = u / (rad_max + epsilon) |
v = v / (rad_max + epsilon) |
return flow_uv_to_colors(u, v, convert_to_bgr) |
from decord import VideoReader, cpu |
from PIL import Image |
from torchvision import transforms |
def get_video(video_name, num_frames=2, delta_time=4, frame=None): |
decord_vr = VideoReader(video_name, num_threads=1, ctx=cpu(0)) |
max_end_ind = len(decord_vr) - num_frames*delta_time - 1 |
start_frame = frame if frame is not None else rng.randint(1, max_end_ind) |
print("fps", decord_vr.get_avg_fps()) |
print("start frame = %d" % start_frame) |
frame_id_list = list(range(start_frame, start_frame + num_frames*delta_time, delta_time)) |
video_data = decord_vr.get_batch(frame_id_list).asnumpy() |
video_data = [Image.fromarray(video_data[t]).convert('RGB') for t, _ in enumerate(frame_id_list)] |
return (torch.stack([transforms.ToTensor()(im) for im in video_data], 0), start_frame) |