|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Defines the various loss functions in use by the PTN model.""" |
|
|
|
from __future__ import absolute_import |
|
from __future__ import division |
|
from __future__ import print_function |
|
|
|
import tensorflow as tf |
|
|
|
slim = tf.contrib.slim |
|
|
|
|
|
def add_rotator_image_loss(inputs, outputs, step_size, weight_scale): |
|
"""Computes the image loss of deep rotator model. |
|
|
|
Args: |
|
inputs: Input dictionary to the model containing keys |
|
such as `images_k'. |
|
outputs: Output dictionary returned by the model containing keys |
|
such as `images_k'. |
|
step_size: A scalar representing the number of recurrent |
|
steps (number of repeated out-of-plane rotations) |
|
in the deep rotator network (int). |
|
weight_scale: A reweighting factor applied over the image loss (float). |
|
|
|
Returns: |
|
A `Tensor' scalar that returns averaged L2 loss |
|
(divided by batch_size and step_size) between the |
|
ground-truth images (RGB) and predicted images (tf.float32). |
|
|
|
""" |
|
batch_size = tf.shape(inputs['images_0'])[0] |
|
image_loss = 0 |
|
for k in range(1, step_size + 1): |
|
image_loss += tf.nn.l2_loss( |
|
inputs['images_%d' % k] - outputs['images_%d' % k]) |
|
|
|
image_loss /= tf.to_float(step_size * batch_size) |
|
slim.summaries.add_scalar_summary( |
|
image_loss, 'image_loss', prefix='losses') |
|
image_loss *= weight_scale |
|
return image_loss |
|
|
|
|
|
def add_rotator_mask_loss(inputs, outputs, step_size, weight_scale): |
|
"""Computes the mask loss of deep rotator model. |
|
|
|
Args: |
|
inputs: Input dictionary to the model containing keys |
|
such as `masks_k'. |
|
outputs: Output dictionary returned by the model containing |
|
keys such as `masks_k'. |
|
step_size: A scalar representing the number of recurrent |
|
steps (number of repeated out-of-plane rotations) |
|
in the deep rotator network (int). |
|
weight_scale: A reweighting factor applied over the mask loss (float). |
|
|
|
Returns: |
|
A `Tensor' that returns averaged L2 loss |
|
(divided by batch_size and step_size) between the ground-truth masks |
|
(object silhouettes) and predicted masks (tf.float32). |
|
|
|
""" |
|
batch_size = tf.shape(inputs['images_0'])[0] |
|
mask_loss = 0 |
|
for k in range(1, step_size + 1): |
|
mask_loss += tf.nn.l2_loss( |
|
inputs['masks_%d' % k] - outputs['masks_%d' % k]) |
|
|
|
mask_loss /= tf.to_float(step_size * batch_size) |
|
slim.summaries.add_scalar_summary( |
|
mask_loss, 'mask_loss', prefix='losses') |
|
mask_loss *= weight_scale |
|
return mask_loss |
|
|
|
|
|
def add_volume_proj_loss(inputs, outputs, num_views, weight_scale): |
|
"""Computes the projection loss of voxel generation model. |
|
|
|
Args: |
|
inputs: Input dictionary to the model containing keys such as |
|
`images_1'. |
|
outputs: Output dictionary returned by the model containing keys |
|
such as `masks_k' and ``projs_k'. |
|
num_views: A integer scalar represents the total number of |
|
viewpoints for each of the object (int). |
|
weight_scale: A reweighting factor applied over the projection loss (float). |
|
|
|
Returns: |
|
A `Tensor' that returns the averaged L2 loss |
|
(divided by batch_size and num_views) between the ground-truth |
|
masks (object silhouettes) and predicted masks (tf.float32). |
|
|
|
""" |
|
batch_size = tf.shape(inputs['images_1'])[0] |
|
proj_loss = 0 |
|
for k in range(num_views): |
|
proj_loss += tf.nn.l2_loss( |
|
outputs['masks_%d' % (k + 1)] - outputs['projs_%d' % (k + 1)]) |
|
proj_loss /= tf.to_float(num_views * batch_size) |
|
slim.summaries.add_scalar_summary( |
|
proj_loss, 'proj_loss', prefix='losses') |
|
proj_loss *= weight_scale |
|
return proj_loss |
|
|
|
|
|
def add_volume_loss(inputs, outputs, num_views, weight_scale): |
|
"""Computes the volume loss of voxel generation model. |
|
|
|
Args: |
|
inputs: Input dictionary to the model containing keys such as |
|
`images_1' and `voxels'. |
|
outputs: Output dictionary returned by the model containing keys |
|
such as `voxels_k'. |
|
num_views: A scalar representing the total number of |
|
viewpoints for each object (int). |
|
weight_scale: A reweighting factor applied over the volume |
|
loss (tf.float32). |
|
|
|
Returns: |
|
A `Tensor' that returns the averaged L2 loss |
|
(divided by batch_size and num_views) between the ground-truth |
|
volumes and predicted volumes (tf.float32). |
|
|
|
""" |
|
batch_size = tf.shape(inputs['images_1'])[0] |
|
vol_loss = 0 |
|
for k in range(num_views): |
|
vol_loss += tf.nn.l2_loss( |
|
inputs['voxels'] - outputs['voxels_%d' % (k + 1)]) |
|
vol_loss /= tf.to_float(num_views * batch_size) |
|
slim.summaries.add_scalar_summary( |
|
vol_loss, 'vol_loss', prefix='losses') |
|
vol_loss *= weight_scale |
|
return vol_loss |
|
|
|
|
|
def regularization_loss(scopes, params): |
|
"""Computes the weight decay as regularization during training. |
|
|
|
Args: |
|
scopes: A list of different components of the model such as |
|
``encoder'', ``decoder'' and ``projector''. |
|
params: Parameters of the model. |
|
|
|
Returns: |
|
Regularization loss (tf.float32). |
|
""" |
|
|
|
reg_loss = tf.zeros(dtype=tf.float32, shape=[]) |
|
if params.weight_decay > 0: |
|
is_trainable = lambda x: x in tf.trainable_variables() |
|
is_weights = lambda x: 'weights' in x.name |
|
for scope in scopes: |
|
scope_vars = filter(is_trainable, |
|
tf.contrib.framework.get_model_variables(scope)) |
|
scope_vars = filter(is_weights, scope_vars) |
|
if scope_vars: |
|
reg_loss += tf.add_n([tf.nn.l2_loss(var) for var in scope_vars]) |
|
|
|
slim.summaries.add_scalar_summary( |
|
reg_loss, 'reg_loss', prefix='losses') |
|
reg_loss *= params.weight_decay |
|
return reg_loss |
|
|