Spaces:

radames
/

UserControllableLT-Latent-Transformer

Runtime error

UserControllableLT-Latent-Transformer / expansion /utils /util_flow.py

endo-yuki-t

initial commit

d7dbcdd almost 3 years ago

7.87 kB

	import math
	import png
	import struct
	import array
	import numpy as np
	import cv2
	import pdb

	from io import *

	UNKNOWN_FLOW_THRESH = 1e9;
	UNKNOWN_FLOW = 1e10;

	# Middlebury checks
	TAG_STRING = 'PIEH' # use this when WRITING the file
	TAG_FLOAT = 202021.25 # check for this when READING the file

	def readPFM(file):
	import re
	file = open(file, 'rb')

	color = None
	width = None
	height = None
	scale = None
	endian = None

	header = file.readline().rstrip()
	if header == b'PF':
	color = True
	elif header == b'Pf':
	color = False
	else:
	raise Exception('Not a PFM file.')

	dim_match = re.match(b'^(\d+)\s(\d+)\s$', file.readline())
	if dim_match:
	width, height = map(int, dim_match.groups())
	else:
	raise Exception('Malformed PFM header.')

	scale = float(file.readline().rstrip())
	if scale < 0: # little-endian
	endian = '<'
	scale = -scale
	else:
	endian = '>' # big-endian

	data = np.fromfile(file, endian + 'f')
	shape = (height, width, 3) if color else (height, width)

	data = np.reshape(data, shape)
	data = np.flipud(data)
	return data, scale


	def save_pfm(file, image, scale = 1):
	import sys
	color = None

	if image.dtype.name != 'float32':
	raise Exception('Image dtype must be float32.')

	if len(image.shape) == 3 and image.shape[2] == 3: # color image
	color = True
	elif len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1: # greyscale
	color = False
	else:
	raise Exception('Image must have H x W x 3, H x W x 1 or H x W dimensions.')

	file.write('PF\n' if color else 'Pf\n')
	file.write('%d %d\n' % (image.shape[1], image.shape[0]))

	endian = image.dtype.byteorder

	if endian == '<' or endian == '=' and sys.byteorder == 'little':
	scale = -scale

	file.write('%f\n' % scale)

	image.tofile(file)


	def ReadMiddleburyFloFile(path):
	""" Read .FLO file as specified by Middlebury.

	Returns tuple (width, height, u, v, mask), where u, v, mask are flat
	arrays of values.
	"""

	with open(path, 'rb') as fil:
	tag = struct.unpack('f', fil.read(4))[0]
	width = struct.unpack('i', fil.read(4))[0]
	height = struct.unpack('i', fil.read(4))[0]

	assert tag == TAG_FLOAT

	#data = np.fromfile(path, dtype=np.float, count=-1)
	#data = data[3:]

	fmt = 'f' * widthheight2
	data = struct.unpack(fmt, fil.read(4widthheight*2))

	u = data[::2]
	v = data[1::2]

	mask = map(lambda x,y: abs(x)<UNKNOWN_FLOW_THRESH and abs(y) < UNKNOWN_FLOW_THRESH, u, v)
	mask = list(mask)
	u_masked = map(lambda x,y: x if y else 0, u, mask)
	v_masked = map(lambda x,y: x if y else 0, v, mask)

	return width, height, list(u_masked), list(v_masked), list(mask)

	def ReadKittiPngFile(path):
	""" Read 16-bit .PNG file as specified by KITTI-2015 (flow).

	Returns a tuple, (width, height, u, v, mask), where u, v, mask
	are flat arrays of values.
	"""
	# Read .png file.
	png_reader = png.Reader(path)
	data = png_reader.read()
	if data[3]['bitdepth'] != 16:
	raise Exception('bitdepth of ' + path + ' is not 16')

	width = data[0]
	height = data[1]

	# Get list of rows.
	rows = list(data[2])

	u = array.array('f', [0]) * width*height
	v = array.array('f', [0]) * width*height
	mask = array.array('f', [0]) * width*height

	for y, row in enumerate(rows):
	for x in range(width):
	ind = width*y+x
	u[ind] = (row[3x] - 2*15) / 64.0
	v[ind] = (row[3x+1] - 2*15) / 64.0
	mask[ind] = row[3*x+2]

	# if mask[ind] > 0:
	# print(u[ind], v[ind], mask[ind], row[3x], row[3x+1], row[3*x+2])

	#png_reader.close()

	return (width, height, u, v, mask)


	def WriteMiddleburyFloFile(path, width, height, u, v, mask=None):
	""" Write .FLO file as specified by Middlebury.
	"""

	if mask is not None:
	u_masked = map(lambda x,y: x if y else UNKNOWN_FLOW, u, mask)
	v_masked = map(lambda x,y: x if y else UNKNOWN_FLOW, v, mask)
	else:
	u_masked = u
	v_masked = v

	fmt = 'f' * widthheight2
	# Interleave lists
	data = [x for t in zip(u_masked,v_masked) for x in t]

	with open(path, 'wb') as fil:
	fil.write(str.encode(TAG_STRING))
	fil.write(struct.pack('i', width))
	fil.write(struct.pack('i', height))
	fil.write(struct.pack(fmt, *data))


	def write_flow(path,flow):

	invalid_idx = (flow[:, :, 2] == 0)
	flow[:, :, 0:2] = flow[:, :, 0:2]64.+ 2 * 15
	flow[invalid_idx, 0] = 0
	flow[invalid_idx, 1] = 0

	flow = flow.astype(np.uint16)
	flow = cv2.imwrite(path, flow[:,:,::-1])

	#WriteKittiPngFile(path,
	# flow.shape[1], flow.shape[0], flow[:,:,0].flatten(),
	# flow[:,:,1].flatten(), flow[:,:,2].flatten())



	def WriteKittiPngFile(path, width, height, u, v, mask=None):
	""" Write 16-bit .PNG file as specified by KITTI-2015 (flow).

	u, v are lists of float values
	mask is a list of floats, denoting the valid pixels.
	"""

	data = array.array('H',[0])widthheight*3

	for i,(u_,v_,mask_) in enumerate(zip(u,v,mask)):
	data[3i] = int(u_64.0+2**15)
	data[3i+1] = int(v_64.0+2**15)
	data[3*i+2] = int(mask_)

	# if mask_ > 0:
	# print(data[3i], data[3i+1],data[3*i+2])

	with open(path, 'wb') as png_file:
	png_writer = png.Writer(width=width, height=height, bitdepth=16, compression=3, greyscale=False)
	png_writer.write_array(png_file, data)


	def ConvertMiddleburyFloToKittiPng(src_path, dest_path):
	width, height, u, v, mask = ReadMiddleburyFloFile(src_path)
	WriteKittiPngFile(dest_path, width, height, u, v, mask=mask)

	def ConvertKittiPngToMiddleburyFlo(src_path, dest_path):
	width, height, u, v, mask = ReadKittiPngFile(src_path)
	WriteMiddleburyFloFile(dest_path, width, height, u, v, mask=mask)


	def ParseFilenameKitti(filename):
	# Parse kitti filename (seq_frameno.xx),
	# return seq, frameno, ext.
	# Be aware that seq might contain the dataset name (if contained as prefix)
	ext = filename[filename.rfind('.'):]
	frameno = filename[filename.rfind('_')+1:filename.rfind('.')]
	frameno = int(frameno)
	seq = filename[:filename.rfind('_')]
	return seq, frameno, ext


	def read_calib_file(filepath):
	"""Read in a calibration file and parse into a dictionary."""
	data = {}

	with open(filepath, 'r') as f:
	for line in f.readlines():
	key, value = line.split(':', 1)
	# The only non-float values in these files are dates, which
	# we don't care about anyway
	try:
	data[key] = np.array([float(x) for x in value.split()])
	except ValueError:
	pass

	return data

	def load_calib_cam_to_cam(cam_to_cam_file):
	# We'll return the camera calibration as a dictionary
	data = {}

	# Load and parse the cam-to-cam calibration data
	filedata = read_calib_file(cam_to_cam_file)

	# Create 3x4 projection matrices
	P_rect_00 = np.reshape(filedata['P_rect_00'], (3, 4))
	P_rect_10 = np.reshape(filedata['P_rect_01'], (3, 4))
	P_rect_20 = np.reshape(filedata['P_rect_02'], (3, 4))
	P_rect_30 = np.reshape(filedata['P_rect_03'], (3, 4))

	# Compute the camera intrinsics
	data['K_cam0'] = P_rect_00[0:3, 0:3]
	data['K_cam1'] = P_rect_10[0:3, 0:3]
	data['K_cam2'] = P_rect_20[0:3, 0:3]
	data['K_cam3'] = P_rect_30[0:3, 0:3]

	data['b00'] = P_rect_00[0, 3] / P_rect_00[0, 0]
	data['b10'] = P_rect_10[0, 3] / P_rect_10[0, 0]
	data['b20'] = P_rect_20[0, 3] / P_rect_20[0, 0]
	data['b30'] = P_rect_30[0, 3] / P_rect_30[0, 0]

	return data