Spaces:
Runtime error
Runtime error
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
""" | |
This code is refer from: | |
https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py | |
""" | |
import paddle | |
from paddle import nn | |
from paddle.nn import functional as F | |
import numpy as np | |
from ppocr.utils.iou import iou | |
class PSELoss(nn.Layer): | |
def __init__(self, | |
alpha, | |
ohem_ratio=3, | |
kernel_sample_mask='pred', | |
reduction='sum', | |
eps=1e-6, | |
**kwargs): | |
"""Implement PSE Loss. | |
""" | |
super(PSELoss, self).__init__() | |
assert reduction in ['sum', 'mean', 'none'] | |
self.alpha = alpha | |
self.ohem_ratio = ohem_ratio | |
self.kernel_sample_mask = kernel_sample_mask | |
self.reduction = reduction | |
self.eps = eps | |
def forward(self, outputs, labels): | |
predicts = outputs['maps'] | |
predicts = F.interpolate(predicts, scale_factor=4) | |
texts = predicts[:, 0, :, :] | |
kernels = predicts[:, 1:, :, :] | |
gt_texts, gt_kernels, training_masks = labels[1:] | |
# text loss | |
selected_masks = self.ohem_batch(texts, gt_texts, training_masks) | |
loss_text = self.dice_loss(texts, gt_texts, selected_masks) | |
iou_text = iou((texts > 0).astype('int64'), | |
gt_texts, | |
training_masks, | |
reduce=False) | |
losses = dict(loss_text=loss_text, iou_text=iou_text) | |
# kernel loss | |
loss_kernels = [] | |
if self.kernel_sample_mask == 'gt': | |
selected_masks = gt_texts * training_masks | |
elif self.kernel_sample_mask == 'pred': | |
selected_masks = ( | |
F.sigmoid(texts) > 0.5).astype('float32') * training_masks | |
for i in range(kernels.shape[1]): | |
kernel_i = kernels[:, i, :, :] | |
gt_kernel_i = gt_kernels[:, i, :, :] | |
loss_kernel_i = self.dice_loss(kernel_i, gt_kernel_i, | |
selected_masks) | |
loss_kernels.append(loss_kernel_i) | |
loss_kernels = paddle.mean(paddle.stack(loss_kernels, axis=1), axis=1) | |
iou_kernel = iou((kernels[:, -1, :, :] > 0).astype('int64'), | |
gt_kernels[:, -1, :, :], | |
training_masks * gt_texts, | |
reduce=False) | |
losses.update(dict(loss_kernels=loss_kernels, iou_kernel=iou_kernel)) | |
loss = self.alpha * loss_text + (1 - self.alpha) * loss_kernels | |
losses['loss'] = loss | |
if self.reduction == 'sum': | |
losses = {x: paddle.sum(v) for x, v in losses.items()} | |
elif self.reduction == 'mean': | |
losses = {x: paddle.mean(v) for x, v in losses.items()} | |
return losses | |
def dice_loss(self, input, target, mask): | |
input = F.sigmoid(input) | |
input = input.reshape([input.shape[0], -1]) | |
target = target.reshape([target.shape[0], -1]) | |
mask = mask.reshape([mask.shape[0], -1]) | |
input = input * mask | |
target = target * mask | |
a = paddle.sum(input * target, 1) | |
b = paddle.sum(input * input, 1) + self.eps | |
c = paddle.sum(target * target, 1) + self.eps | |
d = (2 * a) / (b + c) | |
return 1 - d | |
def ohem_single(self, score, gt_text, training_mask, ohem_ratio=3): | |
pos_num = int(paddle.sum((gt_text > 0.5).astype('float32'))) - int( | |
paddle.sum( | |
paddle.logical_and((gt_text > 0.5), (training_mask <= 0.5)) | |
.astype('float32'))) | |
if pos_num == 0: | |
selected_mask = training_mask | |
selected_mask = selected_mask.reshape( | |
[1, selected_mask.shape[0], selected_mask.shape[1]]).astype( | |
'float32') | |
return selected_mask | |
neg_num = int(paddle.sum((gt_text <= 0.5).astype('float32'))) | |
neg_num = int(min(pos_num * ohem_ratio, neg_num)) | |
if neg_num == 0: | |
selected_mask = training_mask | |
selected_mask = selected_mask.reshape( | |
[1, selected_mask.shape[0], selected_mask.shape[1]]).astype( | |
'float32') | |
return selected_mask | |
neg_score = paddle.masked_select(score, gt_text <= 0.5) | |
neg_score_sorted = paddle.sort(-neg_score) | |
threshold = -neg_score_sorted[neg_num - 1] | |
selected_mask = paddle.logical_and( | |
paddle.logical_or((score >= threshold), (gt_text > 0.5)), | |
(training_mask > 0.5)) | |
selected_mask = selected_mask.reshape( | |
[1, selected_mask.shape[0], selected_mask.shape[1]]).astype( | |
'float32') | |
return selected_mask | |
def ohem_batch(self, scores, gt_texts, training_masks, ohem_ratio=3): | |
selected_masks = [] | |
for i in range(scores.shape[0]): | |
selected_masks.append( | |
self.ohem_single(scores[i, :, :], gt_texts[i, :, :], | |
training_masks[i, :, :], ohem_ratio)) | |
selected_masks = paddle.concat(selected_masks, 0).astype('float32') | |
return selected_masks | |