|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Contains utility functions for the global objectives library.""" |
|
|
|
|
|
import tensorflow as tf |
|
|
|
|
|
def weighted_sigmoid_cross_entropy_with_logits(labels, |
|
logits, |
|
positive_weights=1.0, |
|
negative_weights=1.0, |
|
name=None): |
|
"""Computes a weighting of sigmoid cross entropy given `logits`. |
|
|
|
Measures the weighted probability error in discrete classification tasks in |
|
which classes are independent and not mutually exclusive. For instance, one |
|
could perform multilabel classification where a picture can contain both an |
|
elephant and a dog at the same time. The class weight multiplies the |
|
different types of errors. |
|
For brevity, let `x = logits`, `z = labels`, `c = positive_weights`, |
|
`d = negative_weights` The |
|
weighed logistic loss is |
|
|
|
``` |
|
c * z * -log(sigmoid(x)) + d * (1 - z) * -log(1 - sigmoid(x)) |
|
= c * z * -log(1 / (1 + exp(-x))) - d * (1 - z) * log(exp(-x) / (1 + exp(-x))) |
|
= c * z * log(1 + exp(-x)) + d * (1 - z) * (-log(exp(-x)) + log(1 + exp(-x))) |
|
= c * z * log(1 + exp(-x)) + d * (1 - z) * (x + log(1 + exp(-x))) |
|
= (1 - z) * x * d + (1 - z + c * z ) * log(1 + exp(-x)) |
|
= - d * x * z + d * x + (d - d * z + c * z ) * log(1 + exp(-x)) |
|
``` |
|
|
|
To ensure stability and avoid overflow, the implementation uses the identity |
|
log(1 + exp(-x)) = max(0,-x) + log(1 + exp(-abs(x))) |
|
and the result is computed as |
|
|
|
``` |
|
= -d * x * z + d * x |
|
+ (d - d * z + c * z ) * (max(0,-x) + log(1 + exp(-abs(x)))) |
|
``` |
|
|
|
Note that the loss is NOT an upper bound on the 0-1 loss, unless it is divided |
|
by log(2). |
|
|
|
Args: |
|
labels: A `Tensor` of type `float32` or `float64`. `labels` can be a 2D |
|
tensor with shape [batch_size, num_labels] or a 3D tensor with shape |
|
[batch_size, num_labels, K]. |
|
logits: A `Tensor` of the same type and shape as `labels`. If `logits` has |
|
shape [batch_size, num_labels, K], the loss is computed separately on each |
|
slice [:, :, k] of `logits`. |
|
positive_weights: A `Tensor` that holds positive weights and has the |
|
following semantics according to its shape: |
|
scalar - A global positive weight. |
|
1D tensor - must be of size K, a weight for each 'attempt' |
|
2D tensor - of size [num_labels, K'] where K' is either K or 1. |
|
The `positive_weights` will be expanded to the left to match the |
|
dimensions of logits and labels. |
|
negative_weights: A `Tensor` that holds positive weight and has the |
|
semantics identical to positive_weights. |
|
name: A name for the operation (optional). |
|
|
|
Returns: |
|
A `Tensor` of the same shape as `logits` with the componentwise |
|
weighted logistic losses. |
|
""" |
|
with tf.name_scope( |
|
name, |
|
'weighted_logistic_loss', |
|
[logits, labels, positive_weights, negative_weights]) as name: |
|
labels, logits, positive_weights, negative_weights = prepare_loss_args( |
|
labels, logits, positive_weights, negative_weights) |
|
|
|
softplus_term = tf.add(tf.maximum(-logits, 0.0), |
|
tf.log(1.0 + tf.exp(-tf.abs(logits)))) |
|
weight_dependent_factor = ( |
|
negative_weights + (positive_weights - negative_weights) * labels) |
|
return (negative_weights * (logits - labels * logits) + |
|
weight_dependent_factor * softplus_term) |
|
|
|
|
|
def weighted_hinge_loss(labels, |
|
logits, |
|
positive_weights=1.0, |
|
negative_weights=1.0, |
|
name=None): |
|
"""Computes weighted hinge loss given logits `logits`. |
|
|
|
The loss applies to multi-label classification tasks where labels are |
|
independent and not mutually exclusive. See also |
|
`weighted_sigmoid_cross_entropy_with_logits`. |
|
|
|
Args: |
|
labels: A `Tensor` of type `float32` or `float64`. Each entry must be |
|
either 0 or 1. `labels` can be a 2D tensor with shape |
|
[batch_size, num_labels] or a 3D tensor with shape |
|
[batch_size, num_labels, K]. |
|
logits: A `Tensor` of the same type and shape as `labels`. If `logits` has |
|
shape [batch_size, num_labels, K], the loss is computed separately on each |
|
slice [:, :, k] of `logits`. |
|
positive_weights: A `Tensor` that holds positive weights and has the |
|
following semantics according to its shape: |
|
scalar - A global positive weight. |
|
1D tensor - must be of size K, a weight for each 'attempt' |
|
2D tensor - of size [num_labels, K'] where K' is either K or 1. |
|
The `positive_weights` will be expanded to the left to match the |
|
dimensions of logits and labels. |
|
negative_weights: A `Tensor` that holds positive weight and has the |
|
semantics identical to positive_weights. |
|
name: A name for the operation (optional). |
|
|
|
Returns: |
|
A `Tensor` of the same shape as `logits` with the componentwise |
|
weighted hinge loss. |
|
""" |
|
with tf.name_scope( |
|
name, 'weighted_hinge_loss', |
|
[logits, labels, positive_weights, negative_weights]) as name: |
|
labels, logits, positive_weights, negative_weights = prepare_loss_args( |
|
labels, logits, positive_weights, negative_weights) |
|
|
|
positives_term = positive_weights * labels * tf.maximum(1.0 - logits, 0) |
|
negatives_term = (negative_weights * (1.0 - labels) |
|
* tf.maximum(1.0 + logits, 0)) |
|
return positives_term + negatives_term |
|
|
|
|
|
def weighted_surrogate_loss(labels, |
|
logits, |
|
surrogate_type='xent', |
|
positive_weights=1.0, |
|
negative_weights=1.0, |
|
name=None): |
|
"""Returns either weighted cross-entropy or hinge loss. |
|
|
|
For example `surrogate_type` is 'xent' returns the weighted cross |
|
entropy loss. |
|
|
|
Args: |
|
labels: A `Tensor` of type `float32` or `float64`. Each entry must be |
|
between 0 and 1. `labels` can be a 2D tensor with shape |
|
[batch_size, num_labels] or a 3D tensor with shape |
|
[batch_size, num_labels, K]. |
|
logits: A `Tensor` of the same type and shape as `labels`. If `logits` has |
|
shape [batch_size, num_labels, K], each slice [:, :, k] represents an |
|
'attempt' to predict `labels` and the loss is computed per slice. |
|
surrogate_type: A string that determines which loss to return, supports |
|
'xent' for cross-entropy and 'hinge' for hinge loss. |
|
positive_weights: A `Tensor` that holds positive weights and has the |
|
following semantics according to its shape: |
|
scalar - A global positive weight. |
|
1D tensor - must be of size K, a weight for each 'attempt' |
|
2D tensor - of size [num_labels, K'] where K' is either K or 1. |
|
The `positive_weights` will be expanded to the left to match the |
|
dimensions of logits and labels. |
|
negative_weights: A `Tensor` that holds positive weight and has the |
|
semantics identical to positive_weights. |
|
name: A name for the operation (optional). |
|
|
|
Returns: |
|
The weigthed loss. |
|
|
|
Raises: |
|
ValueError: If value of `surrogate_type` is not supported. |
|
""" |
|
with tf.name_scope( |
|
name, 'weighted_loss', |
|
[logits, labels, surrogate_type, positive_weights, |
|
negative_weights]) as name: |
|
if surrogate_type == 'xent': |
|
return weighted_sigmoid_cross_entropy_with_logits( |
|
logits=logits, |
|
labels=labels, |
|
positive_weights=positive_weights, |
|
negative_weights=negative_weights, |
|
name=name) |
|
elif surrogate_type == 'hinge': |
|
return weighted_hinge_loss( |
|
logits=logits, |
|
labels=labels, |
|
positive_weights=positive_weights, |
|
negative_weights=negative_weights, |
|
name=name) |
|
raise ValueError('surrogate_type %s not supported.' % surrogate_type) |
|
|
|
|
|
def expand_outer(tensor, rank): |
|
"""Expands the given `Tensor` outwards to a target rank. |
|
|
|
For example if rank = 3 and tensor.shape is [3, 4], this function will expand |
|
to such that the resulting shape will be [1, 3, 4]. |
|
|
|
Args: |
|
tensor: The tensor to expand. |
|
rank: The target dimension. |
|
|
|
Returns: |
|
The expanded tensor. |
|
|
|
Raises: |
|
ValueError: If rank of `tensor` is unknown, or if `rank` is smaller than |
|
the rank of `tensor`. |
|
""" |
|
if tensor.get_shape().ndims is None: |
|
raise ValueError('tensor dimension must be known.') |
|
if len(tensor.get_shape()) > rank: |
|
raise ValueError( |
|
'`rank` must be at least the current tensor dimension: (%s vs %s).' % |
|
(rank, len(tensor.get_shape()))) |
|
while len(tensor.get_shape()) < rank: |
|
tensor = tf.expand_dims(tensor, 0) |
|
return tensor |
|
|
|
|
|
def build_label_priors(labels, |
|
weights=None, |
|
positive_pseudocount=1.0, |
|
negative_pseudocount=1.0, |
|
variables_collections=None): |
|
"""Creates an op to maintain and update label prior probabilities. |
|
|
|
For each label, the label priors are estimated as |
|
(P + sum_i w_i y_i) / (P + N + sum_i w_i), |
|
where y_i is the ith label, w_i is the ith weight, P is a pseudo-count of |
|
positive labels, and N is a pseudo-count of negative labels. The index i |
|
ranges over all labels observed during all evaluations of the returned op. |
|
|
|
Args: |
|
labels: A `Tensor` with shape [batch_size, num_labels]. Entries should be |
|
in [0, 1]. |
|
weights: Coefficients representing the weight of each label. Must be either |
|
a Tensor of shape [batch_size, num_labels] or `None`, in which case each |
|
weight is treated as 1.0. |
|
positive_pseudocount: Number of positive labels used to initialize the label |
|
priors. |
|
negative_pseudocount: Number of negative labels used to initialize the label |
|
priors. |
|
variables_collections: Optional list of collections for created variables. |
|
|
|
Returns: |
|
label_priors: An op to update the weighted label_priors. Gives the |
|
current value of the label priors when evaluated. |
|
""" |
|
dtype = labels.dtype.base_dtype |
|
num_labels = get_num_labels(labels) |
|
|
|
if weights is None: |
|
weights = tf.ones_like(labels) |
|
|
|
|
|
|
|
partitioner = tf.get_variable_scope().partitioner |
|
try: |
|
tf.get_variable_scope().set_partitioner(None) |
|
|
|
weighted_label_counts = tf.contrib.framework.model_variable( |
|
name='weighted_label_counts', |
|
shape=[num_labels], |
|
dtype=dtype, |
|
initializer=tf.constant_initializer( |
|
[positive_pseudocount] * num_labels, dtype=dtype), |
|
collections=variables_collections, |
|
trainable=False) |
|
weighted_label_counts_update = weighted_label_counts.assign_add( |
|
tf.reduce_sum(weights * labels, 0)) |
|
|
|
|
|
weight_sum = tf.contrib.framework.model_variable( |
|
name='weight_sum', |
|
shape=[num_labels], |
|
dtype=dtype, |
|
initializer=tf.constant_initializer( |
|
[positive_pseudocount + negative_pseudocount] * num_labels, |
|
dtype=dtype), |
|
collections=variables_collections, |
|
trainable=False) |
|
weight_sum_update = weight_sum.assign_add(tf.reduce_sum(weights, 0)) |
|
|
|
finally: |
|
tf.get_variable_scope().set_partitioner(partitioner) |
|
|
|
label_priors = tf.div( |
|
weighted_label_counts_update, |
|
weight_sum_update) |
|
return label_priors |
|
|
|
|
|
def convert_and_cast(value, name, dtype): |
|
"""Convert input to tensor and cast to dtype. |
|
|
|
Args: |
|
value: An object whose type has a registered Tensor conversion function, |
|
e.g. python numerical type or numpy array. |
|
name: Name to use for the new Tensor, if one is created. |
|
dtype: Optional element type for the returned tensor. |
|
|
|
Returns: |
|
A tensor. |
|
""" |
|
return tf.cast(tf.convert_to_tensor(value, name=name), dtype=dtype) |
|
|
|
|
|
def prepare_loss_args(labels, logits, positive_weights, negative_weights): |
|
"""Prepare arguments for weighted loss functions. |
|
|
|
If needed, will convert given arguments to appropriate type and shape. |
|
|
|
Args: |
|
labels: labels or labels of the loss function. |
|
logits: Logits of the loss function. |
|
positive_weights: Weight on the positive examples. |
|
negative_weights: Weight on the negative examples. |
|
|
|
Returns: |
|
Converted labels, logits, positive_weights, negative_weights. |
|
""" |
|
logits = tf.convert_to_tensor(logits, name='logits') |
|
labels = convert_and_cast(labels, 'labels', logits.dtype) |
|
if len(labels.get_shape()) == 2 and len(logits.get_shape()) == 3: |
|
labels = tf.expand_dims(labels, [2]) |
|
|
|
positive_weights = convert_and_cast(positive_weights, 'positive_weights', |
|
logits.dtype) |
|
positive_weights = expand_outer(positive_weights, logits.get_shape().ndims) |
|
negative_weights = convert_and_cast(negative_weights, 'negative_weights', |
|
logits.dtype) |
|
negative_weights = expand_outer(negative_weights, logits.get_shape().ndims) |
|
return labels, logits, positive_weights, negative_weights |
|
|
|
|
|
def get_num_labels(labels_or_logits): |
|
"""Returns the number of labels inferred from labels_or_logits.""" |
|
if labels_or_logits.get_shape().ndims <= 1: |
|
return 1 |
|
return labels_or_logits.get_shape()[1].value |
|
|