Spaces:
Runtime error
Runtime error
#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. | |
# | |
#Licensed under the Apache License, Version 2.0 (the "License"); | |
#you may not use this file except in compliance with the License. | |
#You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
#Unless required by applicable law or agreed to in writing, software | |
#distributed under the License is distributed on an "AS IS" BASIS, | |
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
#See the License for the specific language governing permissions and | |
#limitations under the License. | |
import paddle | |
import paddle.nn as nn | |
import paddle.nn.functional as F | |
from paddle.nn import L1Loss | |
from paddle.nn import MSELoss as L2Loss | |
from paddle.nn import SmoothL1Loss | |
class CELoss(nn.Layer): | |
def __init__(self, epsilon=None): | |
super().__init__() | |
if epsilon is not None and (epsilon <= 0 or epsilon >= 1): | |
epsilon = None | |
self.epsilon = epsilon | |
def _labelsmoothing(self, target, class_num): | |
if target.shape[-1] != class_num: | |
one_hot_target = F.one_hot(target, class_num) | |
else: | |
one_hot_target = target | |
soft_target = F.label_smooth(one_hot_target, epsilon=self.epsilon) | |
soft_target = paddle.reshape(soft_target, shape=[-1, class_num]) | |
return soft_target | |
def forward(self, x, label): | |
loss_dict = {} | |
if self.epsilon is not None: | |
class_num = x.shape[-1] | |
label = self._labelsmoothing(label, class_num) | |
x = -F.log_softmax(x, axis=-1) | |
loss = paddle.sum(x * label, axis=-1) | |
else: | |
if label.shape[-1] == x.shape[-1]: | |
label = F.softmax(label, axis=-1) | |
soft_label = True | |
else: | |
soft_label = False | |
loss = F.cross_entropy(x, label=label, soft_label=soft_label) | |
return loss | |
class KLJSLoss(object): | |
def __init__(self, mode='kl'): | |
assert mode in ['kl', 'js', 'KL', 'JS' | |
], "mode can only be one of ['kl', 'KL', 'js', 'JS']" | |
self.mode = mode | |
def __call__(self, p1, p2, reduction="mean", eps=1e-5): | |
if self.mode.lower() == 'kl': | |
loss = paddle.multiply(p2, | |
paddle.log((p2 + eps) / (p1 + eps) + eps)) | |
loss += paddle.multiply(p1, | |
paddle.log((p1 + eps) / (p2 + eps) + eps)) | |
loss *= 0.5 | |
elif self.mode.lower() == "js": | |
loss = paddle.multiply( | |
p2, paddle.log((2 * p2 + eps) / (p1 + p2 + eps) + eps)) | |
loss += paddle.multiply( | |
p1, paddle.log((2 * p1 + eps) / (p1 + p2 + eps) + eps)) | |
loss *= 0.5 | |
else: | |
raise ValueError( | |
"The mode.lower() if KLJSLoss should be one of ['kl', 'js']") | |
if reduction == "mean": | |
loss = paddle.mean(loss, axis=[1, 2]) | |
elif reduction == "none" or reduction is None: | |
return loss | |
else: | |
loss = paddle.sum(loss, axis=[1, 2]) | |
return loss | |
class DMLLoss(nn.Layer): | |
""" | |
DMLLoss | |
""" | |
def __init__(self, act=None, use_log=False): | |
super().__init__() | |
if act is not None: | |
assert act in ["softmax", "sigmoid"] | |
if act == "softmax": | |
self.act = nn.Softmax(axis=-1) | |
elif act == "sigmoid": | |
self.act = nn.Sigmoid() | |
else: | |
self.act = None | |
self.use_log = use_log | |
self.jskl_loss = KLJSLoss(mode="kl") | |
def _kldiv(self, x, target): | |
eps = 1.0e-10 | |
loss = target * (paddle.log(target + eps) - x) | |
# batch mean loss | |
loss = paddle.sum(loss) / loss.shape[0] | |
return loss | |
def forward(self, out1, out2): | |
if self.act is not None: | |
out1 = self.act(out1) + 1e-10 | |
out2 = self.act(out2) + 1e-10 | |
if self.use_log: | |
# for recognition distillation, log is needed for feature map | |
log_out1 = paddle.log(out1) | |
log_out2 = paddle.log(out2) | |
loss = ( | |
self._kldiv(log_out1, out2) + self._kldiv(log_out2, out1)) / 2.0 | |
else: | |
# for detection distillation log is not needed | |
loss = self.jskl_loss(out1, out2) | |
return loss | |
class DistanceLoss(nn.Layer): | |
""" | |
DistanceLoss: | |
mode: loss mode | |
""" | |
def __init__(self, mode="l2", **kargs): | |
super().__init__() | |
assert mode in ["l1", "l2", "smooth_l1"] | |
if mode == "l1": | |
self.loss_func = nn.L1Loss(**kargs) | |
elif mode == "l2": | |
self.loss_func = nn.MSELoss(**kargs) | |
elif mode == "smooth_l1": | |
self.loss_func = nn.SmoothL1Loss(**kargs) | |
def forward(self, x, y): | |
return self.loss_func(x, y) | |
class LossFromOutput(nn.Layer): | |
def __init__(self, key='loss', reduction='none'): | |
super().__init__() | |
self.key = key | |
self.reduction = reduction | |
def forward(self, predicts, batch): | |
loss = predicts | |
if self.key is not None and isinstance(predicts, dict): | |
loss = loss[self.key] | |
if self.reduction == 'mean': | |
loss = paddle.mean(loss) | |
elif self.reduction == 'sum': | |
loss = paddle.sum(loss) | |
return {'loss': loss} | |
class KLDivLoss(nn.Layer): | |
""" | |
KLDivLoss | |
""" | |
def __init__(self): | |
super().__init__() | |
def _kldiv(self, x, target, mask=None): | |
eps = 1.0e-10 | |
loss = target * (paddle.log(target + eps) - x) | |
if mask is not None: | |
loss = loss.flatten(0, 1).sum(axis=1) | |
loss = loss.masked_select(mask).mean() | |
else: | |
# batch mean loss | |
loss = paddle.sum(loss) / loss.shape[0] | |
return loss | |
def forward(self, logits_s, logits_t, mask=None): | |
log_out_s = F.log_softmax(logits_s, axis=-1) | |
out_t = F.softmax(logits_t, axis=-1) | |
loss = self._kldiv(log_out_s, out_t, mask) | |
return loss | |
class DKDLoss(nn.Layer): | |
""" | |
KLDivLoss | |
""" | |
def __init__(self, temperature=1.0, alpha=1.0, beta=1.0): | |
super().__init__() | |
self.temperature = temperature | |
self.alpha = alpha | |
self.beta = beta | |
def _cat_mask(self, t, mask1, mask2): | |
t1 = (t * mask1).sum(axis=1, keepdim=True) | |
t2 = (t * mask2).sum(axis=1, keepdim=True) | |
rt = paddle.concat([t1, t2], axis=1) | |
return rt | |
def _kl_div(self, x, label, mask=None): | |
y = (label * (paddle.log(label + 1e-10) - x)).sum(axis=1) | |
if mask is not None: | |
y = y.masked_select(mask).mean() | |
else: | |
y = y.mean() | |
return y | |
def forward(self, logits_student, logits_teacher, target, mask=None): | |
gt_mask = F.one_hot( | |
target.reshape([-1]), num_classes=logits_student.shape[-1]) | |
other_mask = 1 - gt_mask | |
logits_student = logits_student.flatten(0, 1) | |
logits_teacher = logits_teacher.flatten(0, 1) | |
pred_student = F.softmax(logits_student / self.temperature, axis=1) | |
pred_teacher = F.softmax(logits_teacher / self.temperature, axis=1) | |
pred_student = self._cat_mask(pred_student, gt_mask, other_mask) | |
pred_teacher = self._cat_mask(pred_teacher, gt_mask, other_mask) | |
log_pred_student = paddle.log(pred_student) | |
tckd_loss = self._kl_div(log_pred_student, | |
pred_teacher) * (self.temperature**2) | |
pred_teacher_part2 = F.softmax( | |
logits_teacher / self.temperature - 1000.0 * gt_mask, axis=1) | |
log_pred_student_part2 = F.log_softmax( | |
logits_student / self.temperature - 1000.0 * gt_mask, axis=1) | |
nckd_loss = self._kl_div(log_pred_student_part2, | |
pred_teacher_part2) * (self.temperature**2) | |
loss = self.alpha * tckd_loss + self.beta * nckd_loss | |
return loss | |