Spaces:

Kai422kx
/

das3r

Running

File size: 17,072 Bytes

4f6b78d

import torch
import numpy as np
import torchvision.ops as ops

def matmul2(mat1, mat2):
    return torch.matmul(mat1, mat2)

def matmul3(mat1, mat2, mat3):
    return torch.matmul(mat1, torch.matmul(mat2, mat3))

def eye_3x3(B, device='cuda'):
    rt = torch.eye(3, device=torch.device(device)).view(1,3,3).repeat([B, 1, 1])
    return rt

def eye_4x4(B, device='cuda'):
    rt = torch.eye(4, device=torch.device(device)).view(1,4,4).repeat([B, 1, 1])
    return rt

def safe_inverse(a):
    inv = a.clone()
    r_transpose = a[:3, :3].transpose(0, 1)  # inverse of rotation matrix

    inv[:3, :3] = r_transpose
    inv[:3, 3:4] = -torch.matmul(r_transpose, a[:3, 3:4])

    return inv

def safe_inverse_batch(a): #parallel version
    B, _, _ = list(a.shape)
    inv = a.clone()
    r_transpose = a[:, :3, :3].transpose(1,2) #inverse of rotation matrix

    inv[:, :3, :3] = r_transpose
    inv[:, :3, 3:4] = -torch.matmul(r_transpose, a[:, :3, 3:4])

    return inv


def safe_inverse_single(a):
    r, t = split_rt_single(a)
    t = t.view(3,1)
    r_transpose = r.t()
    inv = torch.cat([r_transpose, -torch.matmul(r_transpose, t)], 1)
    bottom_row = a[3:4, :] # this is [0, 0, 0, 1]
    # bottom_row = torch.tensor([0.,0.,0.,1.]).view(1,4)
    inv = torch.cat([inv, bottom_row], 0)
    return inv

def split_intrinsics(K):
    # K is B x 3 x 3 or B x 4 x 4
    fx = K[:,0,0]
    fy = K[:,1,1]
    x0 = K[:,0,2]
    y0 = K[:,1,2]
    return fx, fy, x0, y0

def apply_pix_T_cam(pix_T_cam, xyz):

    fx, fy, x0, y0 = split_intrinsics(pix_T_cam)
    
    # xyz is shaped B x H*W x 3
    # returns xy, shaped B x H*W x 2
    
    B, N, C = list(xyz.shape)
    assert(C==3)
    
    x, y, z = torch.unbind(xyz, axis=-1)

    fx = torch.reshape(fx, [B, 1])
    fy = torch.reshape(fy, [B, 1])
    x0 = torch.reshape(x0, [B, 1])
    y0 = torch.reshape(y0, [B, 1])

    EPS = 1e-4
    z = torch.clamp(z, min=EPS)
    x = (x*fx)/(z)+x0
    y = (y*fy)/(z)+y0
    xy = torch.stack([x, y], axis=-1)
    return xy

def apply_pix_T_cam_py(pix_T_cam, xyz):

    fx, fy, x0, y0 = split_intrinsics(pix_T_cam)
    
    # xyz is shaped B x H*W x 3
    # returns xy, shaped B x H*W x 2
    
    B, N, C = list(xyz.shape)
    assert(C==3)
    
    x, y, z = xyz[:,:,0], xyz[:,:,1], xyz[:,:,2]

    fx = np.reshape(fx, [B, 1])
    fy = np.reshape(fy, [B, 1])
    x0 = np.reshape(x0, [B, 1])
    y0 = np.reshape(y0, [B, 1])

    EPS = 1e-4
    z = np.clip(z, EPS, None)
    x = (x*fx)/(z)+x0
    y = (y*fy)/(z)+y0
    xy = np.stack([x, y], axis=-1)
    return xy

def get_camM_T_camXs(origin_T_camXs, ind=0):
    B, S = list(origin_T_camXs.shape)[0:2]
    camM_T_camXs = torch.zeros_like(origin_T_camXs)
    for b in list(range(B)):
        camM_T_origin = safe_inverse_single(origin_T_camXs[b,ind])
        for s in list(range(S)):
            camM_T_camXs[b,s] = torch.matmul(camM_T_origin, origin_T_camXs[b,s])
    return camM_T_camXs


def realative_T_py(cam_T1, cam_T2):
    cam_T1 = torch.tensor(cam_T1, dtype=torch.float32)
    cam_T2 = torch.tensor(cam_T2, dtype=torch.float32)
    inv_cam_T1 = safe_inverse(cam_T1)
    relative_transform = torch.matmul(inv_cam_T1, cam_T2)
    return relative_transform.numpy()

def apply_4x4(RT, xyz):
    B, N, _ = list(xyz.shape)
    ones = torch.ones_like(xyz[:,:,0:1])
    xyz1 = torch.cat([xyz, ones], 2)
    xyz1_t = torch.transpose(xyz1, 1, 2)
    # this is B x 4 x N
    xyz2_t = torch.matmul(RT, xyz1_t)
    xyz2 = torch.transpose(xyz2_t, 1, 2)
    xyz2 = xyz2[:,:,:3]
    return xyz2

def apply_4x4_py(RT, xyz):
    ones = np.ones_like(xyz[:, 0:1])
    xyz1 = np.concatenate([xyz, ones], 1)
    xyz1_t = xyz1.transpose(1, 0)
    xyz2_t = np.matmul(RT, xyz1_t)
    xyz2 = xyz2_t.transpose(1, 0)
    xyz2 = xyz2[:, :3]
    return xyz2

def apply_4x4_py_batch(RT, xyz):
    # print('RT', RT.shape)
    B, N, _ = list(xyz.shape)
    ones = np.ones_like(xyz[:,:,0:1])
    xyz1 = np.concatenate([xyz, ones], 2)
    # print('xyz1', xyz1.shape)
    xyz1_t = xyz1.transpose(0,2,1)
    # print('xyz1_t', xyz1_t.shape)
    # this is B x 4 x N
    xyz2_t = np.matmul(RT, xyz1_t)
    # print('xyz2_t', xyz2_t.shape)
    xyz2 = xyz2_t.transpose(0,2,1)
    # print('xyz2', xyz2.shape)
    xyz2 = xyz2[:,:,:3]
    return xyz2

def apply_3x3(RT, xy):
    B, N, _ = list(xy.shape)
    ones = torch.ones_like(xy[:,:,0:1])
    xy1 = torch.cat([xy, ones], 2)
    xy1_t = torch.transpose(xy1, 1, 2)
    # this is B x 4 x N
    xy2_t = torch.matmul(RT, xy1_t)
    xy2 = torch.transpose(xy2_t, 1, 2)
    xy2 = xy2[:,:,:2]
    return xy2

def generate_polygon(ctr_x, ctr_y, avg_r, irregularity, spikiness, num_verts):
    '''
    Start with the center of the polygon at ctr_x, ctr_y, 
    Then creates the polygon by sampling points on a circle around the center.
    Random noise is added by varying the angular spacing between sequential points,
    and by varying the radial distance of each point from the centre.

    Params:
        ctr_x, ctr_y - coordinates of the "centre" of the polygon
        avg_r - in px, the average radius of this polygon, this roughly controls how large the polygon is, really only useful for order of magnitude.
        irregularity - [0,1] indicating how much variance there is in the angular spacing of vertices. [0,1] will map to [0, 2pi/numberOfVerts]
        spikiness - [0,1] indicating how much variance there is in each vertex from the circle of radius avg_r. [0,1] will map to [0, avg_r]
pp        num_verts

    Returns:
        np.array [num_verts, 2] - CCW order.
    '''
    # spikiness
    spikiness = np.clip(spikiness, 0, 1) * avg_r

    # generate n angle steps
    irregularity = np.clip(irregularity, 0, 1) * 2 * np.pi / num_verts
    lower = (2*np.pi / num_verts) - irregularity
    upper = (2*np.pi / num_verts) + irregularity

    # angle steps
    angle_steps = np.random.uniform(lower, upper, num_verts)
    sc = (2 * np.pi) / angle_steps.sum()
    angle_steps *= sc

    # get all radii
    angle = np.random.uniform(0, 2*np.pi)
    radii = np.clip(np.random.normal(avg_r, spikiness, num_verts), 0, 2 * avg_r)

    # compute all points
    points = []
    for i in range(num_verts):
        x = ctr_x + radii[i] * np.cos(angle)
        y = ctr_y + radii[i] * np.sin(angle)
        points.append([x, y])
        angle += angle_steps[i]

    return np.array(points).astype(int)


def get_random_affine_2d(B, rot_min=-5.0, rot_max=5.0, tx_min=-0.1, tx_max=0.1, ty_min=-0.1, ty_max=0.1, sx_min=-0.05, sx_max=0.05, sy_min=-0.05, sy_max=0.05, shx_min=-0.05, shx_max=0.05, shy_min=-0.05, shy_max=0.05):
    '''
    Params:
        rot_min: rotation amount min
        rot_max: rotation amount max

        tx_min: translation x min
        tx_max: translation x max

        ty_min: translation y min
        ty_max: translation y max

        sx_min: scaling x min
        sx_max: scaling x max

        sy_min: scaling y min
        sy_max: scaling y max

        shx_min: shear x min
        shx_max: shear x max

        shy_min: shear y min
        shy_max: shear y max

    Returns:
        transformation matrix: (B, 3, 3)
    '''
    # rotation
    if rot_max - rot_min != 0:
        rot_amount = np.random.uniform(low=rot_min, high=rot_max, size=B)
        rot_amount = np.pi/180.0*rot_amount
    else:
        rot_amount = rot_min
    rotation = np.zeros((B, 3, 3)) # B, 3, 3
    rotation[:, 2, 2] = 1
    rotation[:, 0, 0] = np.cos(rot_amount)
    rotation[:, 0, 1] = -np.sin(rot_amount)
    rotation[:, 1, 0] = np.sin(rot_amount)
    rotation[:, 1, 1] = np.cos(rot_amount)

    # translation
    translation = np.zeros((B, 3, 3)) # B, 3, 3
    translation[:, [0,1,2], [0,1,2]] = 1 
    if (tx_max - tx_min) > 0:
        trans_x = np.random.uniform(low=tx_min, high=tx_max, size=B)
        translation[:, 0, 2] = trans_x
    # else:
    #     translation[:, 0, 2] = tx_max
    if ty_max - ty_min != 0:
        trans_y = np.random.uniform(low=ty_min, high=ty_max, size=B)
        translation[:, 1, 2] = trans_y
    # else:
    #     translation[:, 1, 2] = ty_max

    # scaling
    scaling = np.zeros((B, 3, 3)) # B, 3, 3
    scaling[:, [0,1,2], [0,1,2]] = 1 
    if (sx_max - sx_min) > 0:
        scale_x = 1 + np.random.uniform(low=sx_min, high=sx_max, size=B)
        scaling[:, 0, 0] = scale_x
    # else:
    #     scaling[:, 0, 0] = sx_max
    if (sy_max - sy_min) > 0:
        scale_y = 1 + np.random.uniform(low=sy_min, high=sy_max, size=B)
        scaling[:, 1, 1] = scale_y
    # else:
    #     scaling[:, 1, 1] = sy_max

    # shear
    shear = np.zeros((B, 3, 3)) # B, 3, 3
    shear[:, [0,1,2], [0,1,2]] = 1 
    if (shx_max - shx_min) > 0:
        shear_x = np.random.uniform(low=shx_min, high=shx_max, size=B)
        shear[:, 0, 1] = shear_x
    # else:
    #     shear[:, 0, 1] = shx_max
    if (shy_max - shy_min) > 0:
        shear_y = np.random.uniform(low=shy_min, high=shy_max, size=B)
        shear[:, 1, 0] = shear_y
    # else:
    #     shear[:, 1, 0] = shy_max

    # compose all those
    rt = np.einsum("ijk,ikl->ijl", rotation, translation)
    ss = np.einsum("ijk,ikl->ijl", scaling, shear)
    trans = np.einsum("ijk,ikl->ijl", rt, ss)

    return trans

def get_centroid_from_box2d(box2d):
    ymin = box2d[:,0]
    xmin = box2d[:,1]
    ymax = box2d[:,2]
    xmax = box2d[:,3]
    x = (xmin+xmax)/2.0
    y = (ymin+ymax)/2.0
    return y, x

def normalize_boxlist2d(boxlist2d, H, W):
    boxlist2d = boxlist2d.clone()
    ymin, xmin, ymax, xmax = torch.unbind(boxlist2d, dim=2)
    ymin = ymin / float(H)
    ymax = ymax / float(H)
    xmin = xmin / float(W)
    xmax = xmax / float(W)
    boxlist2d = torch.stack([ymin, xmin, ymax, xmax], dim=2)
    return boxlist2d

def unnormalize_boxlist2d(boxlist2d, H, W):
    boxlist2d = boxlist2d.clone()
    ymin, xmin, ymax, xmax = torch.unbind(boxlist2d, dim=2)
    ymin = ymin * float(H)
    ymax = ymax * float(H)
    xmin = xmin * float(W)
    xmax = xmax * float(W)
    boxlist2d = torch.stack([ymin, xmin, ymax, xmax], dim=2)
    return boxlist2d

def unnormalize_box2d(box2d, H, W):
    return unnormalize_boxlist2d(box2d.unsqueeze(1), H, W).squeeze(1)

def normalize_box2d(box2d, H, W):
    return normalize_boxlist2d(box2d.unsqueeze(1), H, W).squeeze(1)

def get_size_from_box2d(box2d):
    ymin = box2d[:,0]
    xmin = box2d[:,1]
    ymax = box2d[:,2]
    xmax = box2d[:,3]
    height = ymax-ymin
    width = xmax-xmin
    return height, width

def crop_and_resize(im, boxlist, PH, PW, boxlist_is_normalized=False):
    B, C, H, W = im.shape
    B2, N, D = boxlist.shape
    assert(B==B2)
    assert(D==4)
    # PH, PW is the size to resize to

    # output is B,N,C,PH,PW

    # pt wants xy xy, unnormalized
    if boxlist_is_normalized:
        boxlist_unnorm = unnormalize_boxlist2d(boxlist, H, W)
    else:
        boxlist_unnorm = boxlist
        
    ymin, xmin, ymax, xmax = boxlist_unnorm.unbind(2)
    # boxlist_pt = torch.stack([boxlist_unnorm[:,1], boxlist_unnorm[:,0], boxlist_unnorm[:,3], boxlist_unnorm[:,2]], dim=1)
    boxlist_pt = torch.stack([xmin, ymin, xmax, ymax], dim=2)
    # we want a B-len list of K x 4 arrays

    # print('im', im.shape)
    # print('boxlist', boxlist.shape)
    # print('boxlist_pt', boxlist_pt.shape)

    # boxlist_pt = list(boxlist_pt.unbind(0))

    crops = []
    for b in range(B):
        crops_b = ops.roi_align(im[b:b+1], [boxlist_pt[b]], output_size=(PH, PW))
        crops.append(crops_b)
    # # crops = im

    # print('crops', crops.shape)
    # crops = crops.reshape(B,N,C,PH,PW)

    
    # crops = []
    # for b in range(B):
    #     crop_b = ops.roi_align(im[b:b+1], [boxlist_pt[b]], output_size=(PH, PW))
    #     print('crop_b', crop_b.shape)
    #     crops.append(crop_b)
    crops = torch.stack(crops, dim=0)
        
    # print('crops', crops.shape)
    # boxlist_list = boxlist_pt.unbind(0)
    # print('rgb_crop', rgb_crop.shape)

    return crops


# def get_boxlist_from_centroid_and_size(cy, cx, h, w, clip=True):
#     # cy,cx are both B,N
#     ymin = cy - h/2
#     ymax = cy + h/2
#     xmin = cx - w/2
#     xmax = cx + w/2

#     box = torch.stack([ymin, xmin, ymax, xmax], dim=-1)
#     if clip:
#         box = torch.clamp(box, 0, 1)
#     return box


def get_boxlist_from_centroid_and_size(cy, cx, h, w):#, clip=False):
    # cy,cx are the same shape
    ymin = cy - h/2
    ymax = cy + h/2
    xmin = cx - w/2
    xmax = cx + w/2

    # if clip:
    #     ymin = torch.clamp(ymin, 0, H-1)
    #     ymax = torch.clamp(ymax, 0, H-1)
    #     xmin = torch.clamp(xmin, 0, W-1)
    #     xmax = torch.clamp(xmax, 0, W-1)
    
    box = torch.stack([ymin, xmin, ymax, xmax], dim=-1)
    return box


def get_box2d_from_mask(mask, normalize=False):
    # mask is B, 1, H, W

    B, C, H, W = mask.shape
    assert(C==1)
    xy = utils.basic.gridcloud2d(B, H, W, norm=False, device=mask.device) # B, H*W, 2

    box = torch.zeros((B, 4), dtype=torch.float32, device=mask.device)
    for b in range(B):
        xy_b = xy[b] # H*W, 2
        mask_b = mask[b].reshape(H*W)
        xy_ = xy_b[mask_b > 0]
        x_ = xy_[:,0]
        y_ = xy_[:,1]
        ymin = torch.min(y_)
        ymax = torch.max(y_)
        xmin = torch.min(x_)
        xmax = torch.max(x_)
        box[b] = torch.stack([ymin, xmin, ymax, xmax], dim=0)
    if normalize:
        box = normalize_boxlist2d(box.unsqueeze(1), H, W).squeeze(1)
    return box

def convert_box2d_to_intrinsics(box2d, pix_T_cam, H, W, use_image_aspect_ratio=True, mult_padding=1.0):
    # box2d is B x 4, with ymin, xmin, ymax, xmax in normalized coords
    # ymin, xmin, ymax, xmax = torch.unbind(box2d, dim=1)
    # H, W is the original size of the image
    # mult_padding is relative to object size in pixels

    # i assume we're rendering an image the same size as the original (H, W)

    if not mult_padding==1.0:
        y, x = get_centroid_from_box2d(box2d)
        h, w = get_size_from_box2d(box2d)
        box2d = get_box2d_from_centroid_and_size(
            y, x, h*mult_padding, w*mult_padding, clip=False)
        
    if use_image_aspect_ratio:
        h, w = get_size_from_box2d(box2d)
        y, x = get_centroid_from_box2d(box2d)

        # note h,w are relative right now
        # we need to undo this, to see the real ratio

        h = h*float(H)
        w = w*float(W)
        box_ratio = h/w
        im_ratio = H/float(W)

        # print('box_ratio:', box_ratio)
        # print('im_ratio:', im_ratio)

        if box_ratio >= im_ratio:
            w = h/im_ratio
            # print('setting w:', h/im_ratio)
        else:
            h = w*im_ratio
            # print('setting h:', w*im_ratio)
            
        box2d = get_box2d_from_centroid_and_size(
            y, x, h/float(H), w/float(W), clip=False)

    assert(h > 1e-4)
    assert(w > 1e-4)
        
    ymin, xmin, ymax, xmax = torch.unbind(box2d, dim=1)

    fx, fy, x0, y0 = split_intrinsics(pix_T_cam)

    # the topleft of the new image will now have a different offset from the center of projection
    
    new_x0 = x0 - xmin*W
    new_y0 = y0 - ymin*H

    pix_T_cam = pack_intrinsics(fx, fy, new_x0, new_y0)
    # this alone will give me an image in original resolution,
    # with its topleft at the box corner

    box_h, box_w = get_size_from_box2d(box2d)
    # these are normalized, and shaped B. (e.g., [0.4], [0.3])

    # we are going to scale the image by the inverse of this,
    # since we are zooming into this area

    sy = 1./box_h
    sx = 1./box_w

    pix_T_cam = scale_intrinsics(pix_T_cam, sx, sy)
    return pix_T_cam, box2d

def pixels2camera(x,y,z,fx,fy,x0,y0):
    # x and y are locations in pixel coordinates, z is a depth in meters
    # they can be images or pointclouds
    # fx, fy, x0, y0 are camera intrinsics
    # returns xyz, sized B x N x 3

    B = x.shape[0]
    
    fx = torch.reshape(fx, [B,1])
    fy = torch.reshape(fy, [B,1])
    x0 = torch.reshape(x0, [B,1])
    y0 = torch.reshape(y0, [B,1])

    x = torch.reshape(x, [B,-1])
    y = torch.reshape(y, [B,-1])
    z = torch.reshape(z, [B,-1])
    
    # unproject
    x = (z/fx)*(x-x0)
    y = (z/fy)*(y-y0)
    
    xyz = torch.stack([x,y,z], dim=2)
    # B x N x 3
    return xyz

def camera2pixels(xyz, pix_T_cam):
    # xyz is shaped B x H*W x 3
    # returns xy, shaped B x H*W x 2
    
    fx, fy, x0, y0 = split_intrinsics(pix_T_cam)
    x, y, z = torch.unbind(xyz, dim=-1)
    B = list(z.shape)[0]

    fx = torch.reshape(fx, [B,1])
    fy = torch.reshape(fy, [B,1])
    x0 = torch.reshape(x0, [B,1])
    y0 = torch.reshape(y0, [B,1])
    x = torch.reshape(x, [B,-1])
    y = torch.reshape(y, [B,-1])
    z = torch.reshape(z, [B,-1])

    EPS = 1e-4
    z = torch.clamp(z, min=EPS)
    x = (x*fx)/z + x0
    y = (y*fy)/z + y0
    xy = torch.stack([x, y], dim=-1)
    return xy

def depth2pointcloud(z, pix_T_cam):
    B, C, H, W = list(z.shape)
    device = z.device
    y, x = utils.basic.meshgrid2d(B, H, W, device=device)
    z = torch.reshape(z, [B, H, W])
    fx, fy, x0, y0 = split_intrinsics(pix_T_cam)
    xyz = pixels2camera(x, y, z, fx, fy, x0, y0)
    return xyz