File size: 6,031 Bytes
ff66cf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import numpy as np
from cliport.utils import utils
from cliport.agents.transporter import OriginalTransporterAgent
from cliport.models.core.attention import Attention
from cliport.models.core.attention_image_goal import AttentionImageGoal
from cliport.models.core.transport_image_goal import TransportImageGoal
class ImageGoalTransporterAgent(OriginalTransporterAgent):
def __init__(self, name, cfg, train_ds, test_ds):
super().__init__(name, cfg, train_ds, test_ds)
def _build_model(self):
stream_fcn = 'plain_resnet'
self.attention = AttentionImageGoal(
stream_fcn=(stream_fcn, None),
in_shape=self.in_shape,
n_rotations=1,
preprocess=utils.preprocess,
cfg=self.cfg,
device=self.device_type,
)
self.transport = TransportImageGoal(
stream_fcn=(stream_fcn, None),
in_shape=self.in_shape,
n_rotations=self.n_rotations,
crop_size=self.crop_size,
preprocess=utils.preprocess,
cfg=self.cfg,
device=self.device_type,
)
def attn_forward(self, inp, softmax=True):
inp_img = inp['inp_img']
goal_img = inp['goal_img']
out = self.attention.forward(inp_img, goal_img, softmax=softmax)
return out
def attn_training_step(self, frame, goal, backprop=True, compute_err=False):
inp_img = frame['img']
goal_img = goal['img']
p0, p0_theta = frame['p0'], frame['p0_theta']
inp = {'inp_img': inp_img, 'goal_img': goal_img}
out = self.attn_forward(inp, softmax=False)
return self.attn_criterion(backprop, compute_err, inp, out, p0, p0_theta)
def trans_forward(self, inp, softmax=True):
inp_img = inp['inp_img']
goal_img = inp['goal_img']
p0 = inp['p0']
out = self.transport.forward(inp_img, goal_img, p0, softmax=softmax)
return out
def transport_training_step(self, frame, goal, backprop=True, compute_err=False):
inp_img = frame['img']
goal_img = goal['img']
p0 = frame['p0']
p1, p1_theta = frame['p1'], frame['p1_theta']
inp = {'inp_img': inp_img, 'goal_img': goal_img, 'p0': p0}
out = self.trans_forward(inp, softmax=False)
err, loss = self.transport_criterion(backprop, compute_err, inp, out, p0, p1, p1_theta)
return loss, err
def training_step(self, batch, batch_idx):
self.attention.train()
self.transport.train()
frame, goal = batch
# Get training losses.
step = self.total_steps + 1
loss0, err0 = self.attn_training_step(frame, goal)
if isinstance(self.transport, Attention):
loss1, err1 = self.attn_training_step(frame, goal)
else:
loss1, err1 = self.transport_training_step(frame, goal)
total_loss = loss0 + loss1
self.log('tr/attn/loss', loss0)
self.log('tr/trans/loss', loss1)
self.log('tr/loss', total_loss)
self.total_steps = step
self.trainer.train_loop.running_loss.append(total_loss)
self.check_save_iteration()
return dict(
loss=total_loss,
)
def validation_step(self, batch, batch_idx):
self.attention.eval()
self.transport.eval()
loss0, loss1 = 0, 0
for i in range(self.val_repeats):
frame, goal = batch
l0, err0 = self.attn_training_step(frame, goal, backprop=False, compute_err=True)
loss0 += l0
if isinstance(self.transport, Attention):
l1, err1 = self.attn_training_step(frame, goal, backprop=False, compute_err=True)
loss1 += l1
else:
l1, err1 = self.transport_training_step(frame, goal, backprop=False, compute_err=True)
loss1 += l1
loss0 /= self.val_repeats
loss1 /= self.val_repeats
val_total_loss = loss0 + loss1
self.trainer.evaluation_loop.trainer.train_loop.running_loss.append(val_total_loss)
return dict(
val_loss=val_total_loss,
val_loss0=loss0,
val_loss1=loss1,
val_attn_dist_err=err0['dist'],
val_attn_theta_err=err0['theta'],
val_trans_dist_err=err1['dist'],
val_trans_theta_err=err1['theta'],
)
def act(self, obs, info=None, goal=None): # pylint: disable=unused-argument
"""Run inference and return best action given visual observations."""
# Get heightmap from RGB-D images.
img = self.test_ds.get_image(obs)
goal_img = self.test_ds.get_image(goal[0])
# Attention model forward pass.
pick_conf = self.attention.forward(img, goal_img)
pick_conf = pick_conf.detach().cpu().numpy()
argmax = np.argmax(pick_conf)
argmax = np.unravel_index(argmax, shape=pick_conf.shape)
p0_pix = argmax[:2]
p0_theta = argmax[2] * (2 * np.pi / pick_conf.shape[2])
# Transport model forward pass.
place_conf = self.transport.forward(img, goal_img, p0_pix)
place_conf = place_conf.permute(1, 2, 0)
place_conf = place_conf.detach().cpu().numpy()
argmax = np.argmax(place_conf)
argmax = np.unravel_index(argmax, shape=place_conf.shape)
p1_pix = argmax[:2]
p1_theta = argmax[2] * (2 * np.pi / place_conf.shape[2])
# Pixels to end effector poses.
hmap = img[:, :, 3]
p0_xyz = utils.pix_to_xyz(p0_pix, hmap, self.bounds, self.pix_size)
p1_xyz = utils.pix_to_xyz(p1_pix, hmap, self.bounds, self.pix_size)
p0_xyzw = utils.eulerXYZ_to_quatXYZW((0, 0, -p0_theta))
p1_xyzw = utils.eulerXYZ_to_quatXYZW((0, 0, -p1_theta))
return {
'pose0': (np.asarray(p0_xyz), np.asarray(p0_xyzw)),
'pose1': (np.asarray(p1_xyz), np.asarray(p1_xyzw)),
'pick': p0_pix,
'place': p1_pix,
}
|