Spaces:
Runtime error
Runtime error
File size: 8,525 Bytes
d7dbcdd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 |
import os
import numbers
import torch
import torch.utils.data as data
import torch
import torchvision.transforms as transforms
import random
from PIL import Image, ImageOps
import numpy as np
import torchvision
from . import depth_transforms as flow_transforms
import pdb
import cv2
from utils.flowlib import read_flow
from utils.util_flow import readPFM, load_calib_cam_to_cam
def default_loader(path):
return Image.open(path).convert('RGB')
def flow_loader(path):
if '.pfm' in path:
data = readPFM(path)[0]
data[:,:,2] = 1
return data
else:
return read_flow(path)
def load_exts(cam_file):
with open(cam_file, 'r') as f:
lines = f.readlines()
l_exts = []
r_exts = []
for l in lines:
if 'L ' in l:
l_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))
if 'R ' in l:
r_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))
return l_exts,r_exts
def disparity_loader(path):
if '.png' in path:
data = Image.open(path)
data = np.ascontiguousarray(data,dtype=np.float32)/256
return data
else:
return readPFM(path)[0]
# triangulation
def triangulation(disp, xcoord, ycoord, bl=1, fl = 450, cx = 479.5, cy = 269.5):
depth = bl*fl / disp # 450px->15mm focal length
X = (xcoord - cx) * depth / fl
Y = (ycoord - cy) * depth / fl
Z = depth
P = np.concatenate((X[np.newaxis],Y[np.newaxis],Z[np.newaxis]),0).reshape(3,-1)
P = np.concatenate((P,np.ones((1,P.shape[-1]))),0)
return P
class myImageFloder(data.Dataset):
def __init__(self, iml0, iml1, flowl0, loader=default_loader, dploader= flow_loader, scale=1.,shape=[320,448], order=1, noise=0.06, pca_augmentor=True, prob = 1.,sc=False,disp0=None,disp1=None,calib=None ):
self.iml0 = iml0
self.iml1 = iml1
self.flowl0 = flowl0
self.loader = loader
self.dploader = dploader
self.scale=scale
self.shape=shape
self.order=order
self.noise = noise
self.pca_augmentor = pca_augmentor
self.prob = prob
self.sc = sc
self.disp0 = disp0
self.disp1 = disp1
self.calib = calib
def __getitem__(self, index):
iml0 = self.iml0[index]
iml1 = self.iml1[index]
flowl0= self.flowl0[index]
th, tw = self.shape
iml0 = self.loader(iml0)
iml1 = self.loader(iml1)
# get disparity
if self.sc:
flowl0 = self.dploader(flowl0)
flowl0 = np.ascontiguousarray(flowl0,dtype=np.float32)
flowl0[np.isnan(flowl0)] = 1e6 # set to max
if 'camera_data.txt' in self.calib[index]:
bl=1
if '15mm_' in self.calib[index]:
fl=450 # 450
else:
fl=1050
cx = 479.5
cy = 269.5
# negative disp
d1 = np.abs(disparity_loader(self.disp0[index]))
d2 = np.abs(disparity_loader(self.disp1[index]) + d1)
elif 'Sintel' in self.calib[index]:
fl = 1000
bl = 1
cx = 511.5
cy = 217.5
d1 = np.zeros(flowl0.shape[:2])
d2 = np.zeros(flowl0.shape[:2])
else:
ints = load_calib_cam_to_cam(self.calib[index])
fl = ints['K_cam2'][0,0]
cx = ints['K_cam2'][0,2]
cy = ints['K_cam2'][1,2]
bl = ints['b20']-ints['b30']
d1 = disparity_loader(self.disp0[index])
d2 = disparity_loader(self.disp1[index])
#flowl0[:,:,2] = (flowl0[:,:,2]==1).astype(float)
flowl0[:,:,2] = np.logical_and(np.logical_and(flowl0[:,:,2]==1, d1!=0), d2!=0).astype(float)
shape = d1.shape
mesh = np.meshgrid(range(shape[1]),range(shape[0]))
xcoord = mesh[0].astype(float)
ycoord = mesh[1].astype(float)
# triangulation in two frames
P0 = triangulation(d1, xcoord, ycoord, bl=bl, fl = fl, cx = cx, cy = cy)
P1 = triangulation(d2, xcoord + flowl0[:,:,0], ycoord + flowl0[:,:,1], bl=bl, fl = fl, cx = cx, cy = cy)
dis0 = P0[2]
dis1 = P1[2]
change_size = dis0.reshape(shape).astype(np.float32)
flow3d = (P1-P0)[:3].reshape((3,)+shape).transpose((1,2,0))
gt_normal = np.concatenate((d1[:,:,np.newaxis],d2[:,:,np.newaxis],d2[:,:,np.newaxis]),-1)
change_size = np.concatenate((change_size[:,:,np.newaxis],gt_normal,flow3d),2)
else:
shape = iml0.size
shape=[shape[1],shape[0]]
flowl0 = np.zeros((shape[0],shape[1],3))
change_size = np.zeros((shape[0],shape[1],7))
depth = disparity_loader(self.iml1[index].replace('camera','groundtruth'))
change_size[:,:,0] = depth
seqid = self.iml0[index].split('/')[-5].rsplit('_',3)[0]
ints = load_calib_cam_to_cam('/data/gengshay/KITTI/%s/calib_cam_to_cam.txt'%seqid)
fl = ints['K_cam2'][0,0]
cx = ints['K_cam2'][0,2]
cy = ints['K_cam2'][1,2]
bl = ints['b20']-ints['b30']
iml1 = np.asarray(iml1)/255.
iml0 = np.asarray(iml0)/255.
iml0 = iml0[:,:,::-1].copy()
iml1 = iml1[:,:,::-1].copy()
## following data augmentation procedure in PWCNet
## https://github.com/lmb-freiburg/flownet2/blob/master/src/caffe/layers/data_augmentation_layer.cu
import __main__ # a workaround for "discount_coeff"
try:
with open('/scratch/gengshay/iter_counts-%d.txt'%int(__main__.args.logname.split('-')[-1]), 'r') as f:
iter_counts = int(f.readline())
except:
iter_counts = 0
schedule = [0.5, 1., 50000.] # initial coeff, final_coeff, half life
schedule_coeff = schedule[0] + (schedule[1] - schedule[0]) * \
(2/(1+np.exp(-1.0986*iter_counts/schedule[2])) - 1)
if self.pca_augmentor:
pca_augmentor = flow_transforms.pseudoPCAAug( schedule_coeff=schedule_coeff)
else:
pca_augmentor = flow_transforms.Scale(1., order=0)
if np.random.binomial(1,self.prob):
co_transform1 = flow_transforms.Compose([
flow_transforms.SpatialAug([th,tw],
scale=[0.2,0.,0.1],
rot=[0.4,0.],
trans=[0.4,0.],
squeeze=[0.3,0.], schedule_coeff=schedule_coeff, order=self.order),
])
else:
co_transform1 = flow_transforms.Compose([
flow_transforms.RandomCrop([th,tw]),
])
co_transform2 = flow_transforms.Compose([
flow_transforms.pseudoPCAAug( schedule_coeff=schedule_coeff),
#flow_transforms.PCAAug(schedule_coeff=schedule_coeff),
flow_transforms.ChromaticAug( schedule_coeff=schedule_coeff, noise=self.noise),
])
flowl0 = np.concatenate([flowl0,change_size],-1)
augmented,flowl0,intr = co_transform1([iml0, iml1], flowl0, [fl,cx,cy,bl])
imol0 = augmented[0]
imol1 = augmented[1]
augmented,flowl0,intr = co_transform2(augmented, flowl0, intr)
iml0 = augmented[0]
iml1 = augmented[1]
flowl0 = flowl0.astype(np.float32)
change_size = flowl0[:,:,3:]
flowl0 = flowl0[:,:,:3]
# randomly cover a region
sx=0;sy=0;cx=0;cy=0
if np.random.binomial(1,0.5):
sx = int(np.random.uniform(25,100))
sy = int(np.random.uniform(25,100))
#sx = int(np.random.uniform(50,150))
#sy = int(np.random.uniform(50,150))
cx = int(np.random.uniform(sx,iml1.shape[0]-sx))
cy = int(np.random.uniform(sy,iml1.shape[1]-sy))
iml1[cx-sx:cx+sx,cy-sy:cy+sy] = np.mean(np.mean(iml1,0),0)[np.newaxis,np.newaxis]
iml0 = torch.Tensor(np.transpose(iml0,(2,0,1)))
iml1 = torch.Tensor(np.transpose(iml1,(2,0,1)))
return iml0, iml1, flowl0, change_size, intr, imol0, imol1, np.asarray([cx-sx,cx+sx,cy-sy,cy+sy])
def __len__(self):
return len(self.iml0)
|