Spaces:
Runtime error
Runtime error
""" | |
# ============================== | |
# flowlib.py | |
# library for optical flow processing | |
# Author: Ruoteng Li | |
# Date: 6th Aug 2016 | |
# ============================== | |
""" | |
import png | |
from . import pfm | |
import numpy as np | |
import matplotlib.colors as cl | |
import matplotlib.pyplot as plt | |
from PIL import Image | |
import cv2 | |
import pdb | |
UNKNOWN_FLOW_THRESH = 1e7 | |
SMALLFLOW = 0.0 | |
LARGEFLOW = 1e8 | |
""" | |
============= | |
Flow Section | |
============= | |
""" | |
def point_vec(img,flow,skip=16): | |
#img[:] = 255 | |
maxsize=256 | |
extendfac=2. | |
resize_factor = max(1,int(max(maxsize/img.shape[0], maxsize/img.shape[1]))) | |
meshgrid = np.meshgrid(range(img.shape[1]),range(img.shape[0])) | |
dispimg = cv2.resize(img[:,:,::-1].copy(), None,fx=resize_factor,fy=resize_factor) | |
colorflow = flow_to_image(flow).astype(int) | |
for i in range(img.shape[1]): # x | |
for j in range(img.shape[0]): # y | |
if flow[j,i,2] != 1: continue | |
if j%skip!=0 or i%skip!=0: continue | |
xend = int((meshgrid[0][j,i]+extendfac*flow[j,i,0])*resize_factor) | |
yend = int((meshgrid[1][j,i]+extendfac*flow[j,i,1])*resize_factor) | |
leng = np.linalg.norm(flow[j,i,:2]*extendfac) | |
if leng<3:continue | |
dispimg = cv2.arrowedLine(dispimg, (meshgrid[0][j,i]*resize_factor,meshgrid[1][j,i]*resize_factor),\ | |
(xend,yend), | |
(int(colorflow[j,i,2]),int(colorflow[j,i,1]),int(colorflow[j,i,0])),4,tipLength=2/leng,line_type=cv2.LINE_AA) | |
return dispimg | |
def show_flow(filename): | |
""" | |
visualize optical flow map using matplotlib | |
:param filename: optical flow file | |
:return: None | |
""" | |
flow = read_flow(filename) | |
img = flow_to_image(flow) | |
plt.imshow(img) | |
plt.show() | |
def visualize_flow(flow, mode='Y'): | |
""" | |
this function visualize the input flow | |
:param flow: input flow in array | |
:param mode: choose which color mode to visualize the flow (Y: Ccbcr, RGB: RGB color) | |
:return: None | |
""" | |
if mode == 'Y': | |
# Ccbcr color wheel | |
img = flow_to_image(flow) | |
plt.imshow(img) | |
plt.show() | |
elif mode == 'RGB': | |
(h, w) = flow.shape[0:2] | |
du = flow[:, :, 0] | |
dv = flow[:, :, 1] | |
valid = flow[:, :, 2] | |
max_flow = max(np.max(du), np.max(dv)) | |
img = np.zeros((h, w, 3), dtype=np.float64) | |
# angle layer | |
img[:, :, 0] = np.arctan2(dv, du) / (2 * np.pi) | |
# magnitude layer, normalized to 1 | |
img[:, :, 1] = np.sqrt(du * du + dv * dv) * 8 / max_flow | |
# phase layer | |
img[:, :, 2] = 8 - img[:, :, 1] | |
# clip to [0,1] | |
small_idx = img[:, :, 0:3] < 0 | |
large_idx = img[:, :, 0:3] > 1 | |
img[small_idx] = 0 | |
img[large_idx] = 1 | |
# convert to rgb | |
img = cl.hsv_to_rgb(img) | |
# remove invalid point | |
import pdb; pdb.set_trace() | |
img[:, :, 0] = img[:, :, 0] * valid | |
img[:, :, 1] = img[:, :, 1] * valid | |
img[:, :, 2] = img[:, :, 2] * valid | |
# show | |
plt.imshow(img) | |
plt.show() | |
return None | |
def read_flow(filename): | |
""" | |
read optical flow data from flow file | |
:param filename: name of the flow file | |
:return: optical flow data in numpy array | |
""" | |
if filename.endswith('.flo'): | |
flow = read_flo_file(filename) | |
elif filename.endswith('.png'): | |
flow = read_png_file(filename) | |
elif filename.endswith('.pfm'): | |
flow = read_pfm_file(filename) | |
else: | |
raise Exception('Invalid flow file format!') | |
return flow | |
def write_flow(flow, filename): | |
""" | |
write optical flow in Middlebury .flo format | |
:param flow: optical flow map | |
:param filename: optical flow file path to be saved | |
:return: None | |
""" | |
f = open(filename, 'wb') | |
magic = np.array([202021.25], dtype=np.float32) | |
(height, width) = flow.shape[0:2] | |
w = np.array([width], dtype=np.int32) | |
h = np.array([height], dtype=np.int32) | |
magic.tofile(f) | |
w.tofile(f) | |
h.tofile(f) | |
flow.tofile(f) | |
f.close() | |
def save_flow_image(flow, image_file): | |
""" | |
save flow visualization into image file | |
:param flow: optical flow data | |
:param flow_fil | |
:return: None | |
""" | |
flow_img = flow_to_image(flow) | |
img_out = Image.fromarray(flow_img) | |
img_out.save(image_file) | |
def flowfile_to_imagefile(flow_file, image_file): | |
""" | |
convert flowfile into image file | |
:param flow: optical flow data | |
:param flow_fil | |
:return: None | |
""" | |
flow = read_flow(flow_file) | |
save_flow_image(flow, image_file) | |
def segment_flow(flow): | |
h = flow.shape[0] | |
w = flow.shape[1] | |
u = flow[:, :, 0] | |
v = flow[:, :, 1] | |
idx = ((abs(u) > LARGEFLOW) | (abs(v) > LARGEFLOW)) | |
idx2 = (abs(u) == SMALLFLOW) | |
class0 = (v == 0) & (u == 0) | |
u[idx2] = 0.00001 | |
tan_value = v / u | |
class1 = (tan_value < 1) & (tan_value >= 0) & (u > 0) & (v >= 0) | |
class2 = (tan_value >= 1) & (u >= 0) & (v >= 0) | |
class3 = (tan_value < -1) & (u <= 0) & (v >= 0) | |
class4 = (tan_value < 0) & (tan_value >= -1) & (u < 0) & (v >= 0) | |
class8 = (tan_value >= -1) & (tan_value < 0) & (u > 0) & (v <= 0) | |
class7 = (tan_value < -1) & (u >= 0) & (v <= 0) | |
class6 = (tan_value >= 1) & (u <= 0) & (v <= 0) | |
class5 = (tan_value >= 0) & (tan_value < 1) & (u < 0) & (v <= 0) | |
seg = np.zeros((h, w)) | |
seg[class1] = 1 | |
seg[class2] = 2 | |
seg[class3] = 3 | |
seg[class4] = 4 | |
seg[class5] = 5 | |
seg[class6] = 6 | |
seg[class7] = 7 | |
seg[class8] = 8 | |
seg[class0] = 0 | |
seg[idx] = 0 | |
return seg | |
def flow_error(tu, tv, u, v): | |
""" | |
Calculate average end point error | |
:param tu: ground-truth horizontal flow map | |
:param tv: ground-truth vertical flow map | |
:param u: estimated horizontal flow map | |
:param v: estimated vertical flow map | |
:return: End point error of the estimated flow | |
""" | |
smallflow = 0.0 | |
''' | |
stu = tu[bord+1:end-bord,bord+1:end-bord] | |
stv = tv[bord+1:end-bord,bord+1:end-bord] | |
su = u[bord+1:end-bord,bord+1:end-bord] | |
sv = v[bord+1:end-bord,bord+1:end-bord] | |
''' | |
stu = tu[:] | |
stv = tv[:] | |
su = u[:] | |
sv = v[:] | |
idxUnknow = (abs(stu) > UNKNOWN_FLOW_THRESH) | (abs(stv) > UNKNOWN_FLOW_THRESH) | |
stu[idxUnknow] = 0 | |
stv[idxUnknow] = 0 | |
su[idxUnknow] = 0 | |
sv[idxUnknow] = 0 | |
ind2 = [(np.absolute(stu) > smallflow) | (np.absolute(stv) > smallflow)] | |
index_su = su[ind2] | |
index_sv = sv[ind2] | |
an = 1.0 / np.sqrt(index_su ** 2 + index_sv ** 2 + 1) | |
un = index_su * an | |
vn = index_sv * an | |
index_stu = stu[ind2] | |
index_stv = stv[ind2] | |
tn = 1.0 / np.sqrt(index_stu ** 2 + index_stv ** 2 + 1) | |
tun = index_stu * tn | |
tvn = index_stv * tn | |
''' | |
angle = un * tun + vn * tvn + (an * tn) | |
index = [angle == 1.0] | |
angle[index] = 0.999 | |
ang = np.arccos(angle) | |
mang = np.mean(ang) | |
mang = mang * 180 / np.pi | |
''' | |
epe = np.sqrt((stu - su) ** 2 + (stv - sv) ** 2) | |
epe = epe[ind2] | |
mepe = np.mean(epe) | |
return mepe | |
def flow_to_image(flow): | |
""" | |
Convert flow into middlebury color code image | |
:param flow: optical flow map | |
:return: optical flow image in middlebury color | |
""" | |
u = flow[:, :, 0] | |
v = flow[:, :, 1] | |
maxu = -999. | |
maxv = -999. | |
minu = 999. | |
minv = 999. | |
idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH) | |
u[idxUnknow] = 0 | |
v[idxUnknow] = 0 | |
maxu = max(maxu, np.max(u)) | |
minu = min(minu, np.min(u)) | |
maxv = max(maxv, np.max(v)) | |
minv = min(minv, np.min(v)) | |
rad = np.sqrt(u ** 2 + v ** 2) | |
maxrad = max(-1, np.max(rad)) | |
u = u/(maxrad + np.finfo(float).eps) | |
v = v/(maxrad + np.finfo(float).eps) | |
img = compute_color(u, v) | |
idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2) | |
img[idx] = 0 | |
return np.uint8(img) | |
def evaluate_flow_file(gt_file, pred_file): | |
""" | |
evaluate the estimated optical flow end point error according to ground truth provided | |
:param gt_file: ground truth file path | |
:param pred_file: estimated optical flow file path | |
:return: end point error, float32 | |
""" | |
# Read flow files and calculate the errors | |
gt_flow = read_flow(gt_file) # ground truth flow | |
eva_flow = read_flow(pred_file) # predicted flow | |
# Calculate errors | |
average_pe = flow_error(gt_flow[:, :, 0], gt_flow[:, :, 1], eva_flow[:, :, 0], eva_flow[:, :, 1]) | |
return average_pe | |
def evaluate_flow(gt_flow, pred_flow): | |
""" | |
gt: ground-truth flow | |
pred: estimated flow | |
""" | |
average_pe = flow_error(gt_flow[:, :, 0], gt_flow[:, :, 1], pred_flow[:, :, 0], pred_flow[:, :, 1]) | |
return average_pe | |
""" | |
============== | |
Disparity Section | |
============== | |
""" | |
def read_disp_png(file_name): | |
""" | |
Read optical flow from KITTI .png file | |
:param file_name: name of the flow file | |
:return: optical flow data in matrix | |
""" | |
image_object = png.Reader(filename=file_name) | |
image_direct = image_object.asDirect() | |
image_data = list(image_direct[2]) | |
(w, h) = image_direct[3]['size'] | |
channel = len(image_data[0]) / w | |
flow = np.zeros((h, w, channel), dtype=np.uint16) | |
for i in range(len(image_data)): | |
for j in range(channel): | |
flow[i, :, j] = image_data[i][j::channel] | |
return flow[:, :, 0] / 256 | |
def disp_to_flowfile(disp, filename): | |
""" | |
Read KITTI disparity file in png format | |
:param disp: disparity matrix | |
:param filename: the flow file name to save | |
:return: None | |
""" | |
f = open(filename, 'wb') | |
magic = np.array([202021.25], dtype=np.float32) | |
(height, width) = disp.shape[0:2] | |
w = np.array([width], dtype=np.int32) | |
h = np.array([height], dtype=np.int32) | |
empty_map = np.zeros((height, width), dtype=np.float32) | |
data = np.dstack((disp, empty_map)) | |
magic.tofile(f) | |
w.tofile(f) | |
h.tofile(f) | |
data.tofile(f) | |
f.close() | |
""" | |
============== | |
Image Section | |
============== | |
""" | |
def read_image(filename): | |
""" | |
Read normal image of any format | |
:param filename: name of the image file | |
:return: image data in matrix uint8 type | |
""" | |
img = Image.open(filename) | |
im = np.array(img) | |
return im | |
def warp_flow(img, flow): | |
h, w = flow.shape[:2] | |
flow = flow.copy().astype(np.float32) | |
flow[:,:,0] += np.arange(w) | |
flow[:,:,1] += np.arange(h)[:,np.newaxis] | |
res = cv2.remap(img, flow, None, cv2.INTER_LINEAR) | |
return res | |
def warp_image(im, flow): | |
""" | |
Use optical flow to warp image to the next | |
:param im: image to warp | |
:param flow: optical flow | |
:return: warped image | |
""" | |
from scipy import interpolate | |
image_height = im.shape[0] | |
image_width = im.shape[1] | |
flow_height = flow.shape[0] | |
flow_width = flow.shape[1] | |
n = image_height * image_width | |
(iy, ix) = np.mgrid[0:image_height, 0:image_width] | |
(fy, fx) = np.mgrid[0:flow_height, 0:flow_width] | |
fx = fx.astype(np.float64) | |
fy = fy.astype(np.float64) | |
fx += flow[:,:,0] | |
fy += flow[:,:,1] | |
mask = np.logical_or(fx <0 , fx > flow_width) | |
mask = np.logical_or(mask, fy < 0) | |
mask = np.logical_or(mask, fy > flow_height) | |
fx = np.minimum(np.maximum(fx, 0), flow_width) | |
fy = np.minimum(np.maximum(fy, 0), flow_height) | |
points = np.concatenate((ix.reshape(n,1), iy.reshape(n,1)), axis=1) | |
xi = np.concatenate((fx.reshape(n, 1), fy.reshape(n,1)), axis=1) | |
warp = np.zeros((image_height, image_width, im.shape[2])) | |
for i in range(im.shape[2]): | |
channel = im[:, :, i] | |
plt.imshow(channel, cmap='gray') | |
values = channel.reshape(n, 1) | |
new_channel = interpolate.griddata(points, values, xi, method='cubic') | |
new_channel = np.reshape(new_channel, [flow_height, flow_width]) | |
new_channel[mask] = 1 | |
warp[:, :, i] = new_channel.astype(np.uint8) | |
return warp.astype(np.uint8) | |
""" | |
============== | |
Others | |
============== | |
""" | |
def pfm_to_flo(pfm_file): | |
flow_filename = pfm_file[0:pfm_file.find('.pfm')] + '.flo' | |
(data, scale) = pfm.readPFM(pfm_file) | |
flow = data[:, :, 0:2] | |
write_flow(flow, flow_filename) | |
def scale_image(image, new_range): | |
""" | |
Linearly scale the image into desired range | |
:param image: input image | |
:param new_range: the new range to be aligned | |
:return: image normalized in new range | |
""" | |
min_val = np.min(image).astype(np.float32) | |
max_val = np.max(image).astype(np.float32) | |
min_val_new = np.array(min(new_range), dtype=np.float32) | |
max_val_new = np.array(max(new_range), dtype=np.float32) | |
scaled_image = (image - min_val) / (max_val - min_val) * (max_val_new - min_val_new) + min_val_new | |
return scaled_image.astype(np.uint8) | |
def compute_color(u, v): | |
""" | |
compute optical flow color map | |
:param u: optical flow horizontal map | |
:param v: optical flow vertical map | |
:return: optical flow in color code | |
""" | |
[h, w] = u.shape | |
img = np.zeros([h, w, 3]) | |
nanIdx = np.isnan(u) | np.isnan(v) | |
u[nanIdx] = 0 | |
v[nanIdx] = 0 | |
colorwheel = make_color_wheel() | |
ncols = np.size(colorwheel, 0) | |
rad = np.sqrt(u**2+v**2) | |
a = np.arctan2(-v, -u) / np.pi | |
fk = (a+1) / 2 * (ncols - 1) + 1 | |
k0 = np.floor(fk).astype(int) | |
k1 = k0 + 1 | |
k1[k1 == ncols+1] = 1 | |
f = fk - k0 | |
for i in range(0, np.size(colorwheel,1)): | |
tmp = colorwheel[:, i] | |
col0 = tmp[k0-1] / 255 | |
col1 = tmp[k1-1] / 255 | |
col = (1-f) * col0 + f * col1 | |
idx = rad <= 1 | |
col[idx] = 1-rad[idx]*(1-col[idx]) | |
notidx = np.logical_not(idx) | |
col[notidx] *= 0.75 | |
img[:, :, i] = np.uint8(np.floor(255 * col*(1-nanIdx))) | |
return img | |
def make_color_wheel(): | |
""" | |
Generate color wheel according Middlebury color code | |
:return: Color wheel | |
""" | |
RY = 15 | |
YG = 6 | |
GC = 4 | |
CB = 11 | |
BM = 13 | |
MR = 6 | |
ncols = RY + YG + GC + CB + BM + MR | |
colorwheel = np.zeros([ncols, 3]) | |
col = 0 | |
# RY | |
colorwheel[0:RY, 0] = 255 | |
colorwheel[0:RY, 1] = np.transpose(np.floor(255*np.arange(0, RY) / RY)) | |
col += RY | |
# YG | |
colorwheel[col:col+YG, 0] = 255 - np.transpose(np.floor(255*np.arange(0, YG) / YG)) | |
colorwheel[col:col+YG, 1] = 255 | |
col += YG | |
# GC | |
colorwheel[col:col+GC, 1] = 255 | |
colorwheel[col:col+GC, 2] = np.transpose(np.floor(255*np.arange(0, GC) / GC)) | |
col += GC | |
# CB | |
colorwheel[col:col+CB, 1] = 255 - np.transpose(np.floor(255*np.arange(0, CB) / CB)) | |
colorwheel[col:col+CB, 2] = 255 | |
col += CB | |
# BM | |
colorwheel[col:col+BM, 2] = 255 | |
colorwheel[col:col+BM, 0] = np.transpose(np.floor(255*np.arange(0, BM) / BM)) | |
col += + BM | |
# MR | |
colorwheel[col:col+MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR)) | |
colorwheel[col:col+MR, 0] = 255 | |
return colorwheel | |
def read_flo_file(filename): | |
""" | |
Read from Middlebury .flo file | |
:param flow_file: name of the flow file | |
:return: optical flow data in matrix | |
""" | |
f = open(filename, 'rb') | |
magic = np.fromfile(f, np.float32, count=1) | |
data2d = None | |
if 202021.25 != magic: | |
print('Magic number incorrect. Invalid .flo file') | |
else: | |
w = np.fromfile(f, np.int32, count=1) | |
h = np.fromfile(f, np.int32, count=1) | |
#print("Reading %d x %d flow file in .flo format" % (h, w)) | |
flow = np.ones((h[0],w[0],3)) | |
data2d = np.fromfile(f, np.float32, count=2 * w[0] * h[0]) | |
# reshape data into 3D array (columns, rows, channels) | |
data2d = np.resize(data2d, (h[0], w[0], 2)) | |
flow[:,:,:2] = data2d | |
f.close() | |
return flow | |
def read_png_file(flow_file): | |
""" | |
Read from KITTI .png file | |
:param flow_file: name of the flow file | |
:return: optical flow data in matrix | |
""" | |
flow = cv2.imread(flow_file,-1)[:,:,::-1].astype(np.float64) | |
# flow_object = png.Reader(filename=flow_file) | |
# flow_direct = flow_object.asDirect() | |
# flow_data = list(flow_direct[2]) | |
# (w, h) = flow_direct[3]['size'] | |
# #print("Reading %d x %d flow file in .png format" % (h, w)) | |
# flow = np.zeros((h, w, 3), dtype=np.float64) | |
# for i in range(len(flow_data)): | |
# flow[i, :, 0] = flow_data[i][0::3] | |
# flow[i, :, 1] = flow_data[i][1::3] | |
# flow[i, :, 2] = flow_data[i][2::3] | |
invalid_idx = (flow[:, :, 2] == 0) | |
flow[:, :, 0:2] = (flow[:, :, 0:2] - 2 ** 15) / 64.0 | |
flow[invalid_idx, 0] = 0 | |
flow[invalid_idx, 1] = 0 | |
return flow | |
def read_pfm_file(flow_file): | |
""" | |
Read from .pfm file | |
:param flow_file: name of the flow file | |
:return: optical flow data in matrix | |
""" | |
(data, scale) = pfm.readPFM(flow_file) | |
return data | |
# fast resample layer | |
def resample(img, sz): | |
""" | |
img: flow map to be resampled | |
sz: new flow map size. Must be [height,weight] | |
""" | |
original_image_size = img.shape | |
in_height = img.shape[0] | |
in_width = img.shape[1] | |
out_height = sz[0] | |
out_width = sz[1] | |
out_flow = np.zeros((out_height, out_width, 2)) | |
# find scale | |
height_scale = float(in_height) / float(out_height) | |
width_scale = float(in_width) / float(out_width) | |
[x,y] = np.meshgrid(range(out_width), range(out_height)) | |
xx = x * width_scale | |
yy = y * height_scale | |
x0 = np.floor(xx).astype(np.int32) | |
x1 = x0 + 1 | |
y0 = np.floor(yy).astype(np.int32) | |
y1 = y0 + 1 | |
x0 = np.clip(x0,0,in_width-1) | |
x1 = np.clip(x1,0,in_width-1) | |
y0 = np.clip(y0,0,in_height-1) | |
y1 = np.clip(y1,0,in_height-1) | |
Ia = img[y0,x0,:] | |
Ib = img[y1,x0,:] | |
Ic = img[y0,x1,:] | |
Id = img[y1,x1,:] | |
wa = (y1-yy) * (x1-xx) | |
wb = (yy-y0) * (x1-xx) | |
wc = (y1-yy) * (xx-x0) | |
wd = (yy-y0) * (xx-x0) | |
out_flow[:,:,0] = (Ia[:,:,0]*wa + Ib[:,:,0]*wb + Ic[:,:,0]*wc + Id[:,:,0]*wd) * out_width / in_width | |
out_flow[:,:,1] = (Ia[:,:,1]*wa + Ib[:,:,1]*wb + Ic[:,:,1]*wc + Id[:,:,1]*wd) * out_height / in_height | |
return out_flow | |