Spaces:
Runtime error
Runtime error
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import math | |
import paddle | |
import paddle.nn as nn | |
from paddle import ParamAttr | |
import paddle.nn.functional as F | |
import numpy as np | |
from .rec_att_head import AttentionGRUCell | |
def get_para_bias_attr(l2_decay, k): | |
if l2_decay > 0: | |
regularizer = paddle.regularizer.L2Decay(l2_decay) | |
stdv = 1.0 / math.sqrt(k * 1.0) | |
initializer = nn.initializer.Uniform(-stdv, stdv) | |
else: | |
regularizer = None | |
initializer = None | |
weight_attr = ParamAttr(regularizer=regularizer, initializer=initializer) | |
bias_attr = ParamAttr(regularizer=regularizer, initializer=initializer) | |
return [weight_attr, bias_attr] | |
class TableAttentionHead(nn.Layer): | |
def __init__(self, | |
in_channels, | |
hidden_size, | |
in_max_len=488, | |
max_text_length=800, | |
out_channels=30, | |
loc_reg_num=4, | |
**kwargs): | |
super(TableAttentionHead, self).__init__() | |
self.input_size = in_channels[-1] | |
self.hidden_size = hidden_size | |
self.out_channels = out_channels | |
self.max_text_length = max_text_length | |
self.structure_attention_cell = AttentionGRUCell( | |
self.input_size, hidden_size, self.out_channels, use_gru=False) | |
self.structure_generator = nn.Linear(hidden_size, self.out_channels) | |
self.in_max_len = in_max_len | |
if self.in_max_len == 640: | |
self.loc_fea_trans = nn.Linear(400, self.max_text_length + 1) | |
elif self.in_max_len == 800: | |
self.loc_fea_trans = nn.Linear(625, self.max_text_length + 1) | |
else: | |
self.loc_fea_trans = nn.Linear(256, self.max_text_length + 1) | |
self.loc_generator = nn.Linear(self.input_size + hidden_size, | |
loc_reg_num) | |
def _char_to_onehot(self, input_char, onehot_dim): | |
input_ont_hot = F.one_hot(input_char, onehot_dim) | |
return input_ont_hot | |
def forward(self, inputs, targets=None): | |
# if and else branch are both needed when you want to assign a variable | |
# if you modify the var in just one branch, then the modification will not work. | |
fea = inputs[-1] | |
last_shape = int(np.prod(fea.shape[2:])) # gry added | |
fea = paddle.reshape(fea, [fea.shape[0], fea.shape[1], last_shape]) | |
fea = fea.transpose([0, 2, 1]) # (NTC)(batch, width, channels) | |
batch_size = fea.shape[0] | |
hidden = paddle.zeros((batch_size, self.hidden_size)) | |
output_hiddens = paddle.zeros( | |
(batch_size, self.max_text_length + 1, self.hidden_size)) | |
if self.training and targets is not None: | |
structure = targets[0] | |
for i in range(self.max_text_length + 1): | |
elem_onehots = self._char_to_onehot( | |
structure[:, i], onehot_dim=self.out_channels) | |
(outputs, hidden), alpha = self.structure_attention_cell( | |
hidden, fea, elem_onehots) | |
output_hiddens[:, i, :] = outputs | |
structure_probs = self.structure_generator(output_hiddens) | |
loc_fea = fea.transpose([0, 2, 1]) | |
loc_fea = self.loc_fea_trans(loc_fea) | |
loc_fea = loc_fea.transpose([0, 2, 1]) | |
loc_concat = paddle.concat([output_hiddens, loc_fea], axis=2) | |
loc_preds = self.loc_generator(loc_concat) | |
loc_preds = F.sigmoid(loc_preds) | |
else: | |
temp_elem = paddle.zeros(shape=[batch_size], dtype="int32") | |
structure_probs = None | |
loc_preds = None | |
elem_onehots = None | |
outputs = None | |
alpha = None | |
max_text_length = paddle.to_tensor(self.max_text_length) | |
for i in range(max_text_length + 1): | |
elem_onehots = self._char_to_onehot( | |
temp_elem, onehot_dim=self.out_channels) | |
(outputs, hidden), alpha = self.structure_attention_cell( | |
hidden, fea, elem_onehots) | |
output_hiddens[:, i, :] = outputs | |
structure_probs_step = self.structure_generator(outputs) | |
temp_elem = structure_probs_step.argmax(axis=1, dtype="int32") | |
structure_probs = self.structure_generator(output_hiddens) | |
structure_probs = F.softmax(structure_probs) | |
loc_fea = fea.transpose([0, 2, 1]) | |
loc_fea = self.loc_fea_trans(loc_fea) | |
loc_fea = loc_fea.transpose([0, 2, 1]) | |
loc_concat = paddle.concat([output_hiddens, loc_fea], axis=2) | |
loc_preds = self.loc_generator(loc_concat) | |
loc_preds = F.sigmoid(loc_preds) | |
return {'structure_probs': structure_probs, 'loc_preds': loc_preds} | |
class SLAHead(nn.Layer): | |
def __init__(self, | |
in_channels, | |
hidden_size, | |
out_channels=30, | |
max_text_length=500, | |
loc_reg_num=4, | |
fc_decay=0.0, | |
**kwargs): | |
""" | |
@param in_channels: input shape | |
@param hidden_size: hidden_size for RNN and Embedding | |
@param out_channels: num_classes to rec | |
@param max_text_length: max text pred | |
""" | |
super().__init__() | |
in_channels = in_channels[-1] | |
self.hidden_size = hidden_size | |
self.max_text_length = max_text_length | |
self.emb = self._char_to_onehot | |
self.num_embeddings = out_channels | |
self.loc_reg_num = loc_reg_num | |
# structure | |
self.structure_attention_cell = AttentionGRUCell( | |
in_channels, hidden_size, self.num_embeddings) | |
weight_attr, bias_attr = get_para_bias_attr( | |
l2_decay=fc_decay, k=hidden_size) | |
weight_attr1_1, bias_attr1_1 = get_para_bias_attr( | |
l2_decay=fc_decay, k=hidden_size) | |
weight_attr1_2, bias_attr1_2 = get_para_bias_attr( | |
l2_decay=fc_decay, k=hidden_size) | |
self.structure_generator = nn.Sequential( | |
nn.Linear( | |
self.hidden_size, | |
self.hidden_size, | |
weight_attr=weight_attr1_2, | |
bias_attr=bias_attr1_2), | |
nn.Linear( | |
hidden_size, | |
out_channels, | |
weight_attr=weight_attr, | |
bias_attr=bias_attr)) | |
# loc | |
weight_attr1, bias_attr1 = get_para_bias_attr( | |
l2_decay=fc_decay, k=self.hidden_size) | |
weight_attr2, bias_attr2 = get_para_bias_attr( | |
l2_decay=fc_decay, k=self.hidden_size) | |
self.loc_generator = nn.Sequential( | |
nn.Linear( | |
self.hidden_size, | |
self.hidden_size, | |
weight_attr=weight_attr1, | |
bias_attr=bias_attr1), | |
nn.Linear( | |
self.hidden_size, | |
loc_reg_num, | |
weight_attr=weight_attr2, | |
bias_attr=bias_attr2), | |
nn.Sigmoid()) | |
def forward(self, inputs, targets=None): | |
fea = inputs[-1] | |
batch_size = fea.shape[0] | |
# reshape | |
fea = paddle.reshape(fea, [fea.shape[0], fea.shape[1], -1]) | |
fea = fea.transpose([0, 2, 1]) # (NTC)(batch, width, channels) | |
hidden = paddle.zeros((batch_size, self.hidden_size)) | |
structure_preds = paddle.zeros( | |
(batch_size, self.max_text_length + 1, self.num_embeddings)) | |
loc_preds = paddle.zeros( | |
(batch_size, self.max_text_length + 1, self.loc_reg_num)) | |
structure_preds.stop_gradient = True | |
loc_preds.stop_gradient = True | |
if self.training and targets is not None: | |
structure = targets[0] | |
for i in range(self.max_text_length + 1): | |
hidden, structure_step, loc_step = self._decode(structure[:, i], | |
fea, hidden) | |
structure_preds[:, i, :] = structure_step | |
loc_preds[:, i, :] = loc_step | |
else: | |
pre_chars = paddle.zeros(shape=[batch_size], dtype="int32") | |
max_text_length = paddle.to_tensor(self.max_text_length) | |
# for export | |
loc_step, structure_step = None, None | |
for i in range(max_text_length + 1): | |
hidden, structure_step, loc_step = self._decode(pre_chars, fea, | |
hidden) | |
pre_chars = structure_step.argmax(axis=1, dtype="int32") | |
structure_preds[:, i, :] = structure_step | |
loc_preds[:, i, :] = loc_step | |
if not self.training: | |
structure_preds = F.softmax(structure_preds) | |
return {'structure_probs': structure_preds, 'loc_preds': loc_preds} | |
def _decode(self, pre_chars, features, hidden): | |
""" | |
Predict table label and coordinates for each step | |
@param pre_chars: Table label in previous step | |
@param features: | |
@param hidden: hidden status in previous step | |
@return: | |
""" | |
emb_feature = self.emb(pre_chars) | |
# output shape is b * self.hidden_size | |
(output, hidden), alpha = self.structure_attention_cell( | |
hidden, features, emb_feature) | |
# structure | |
structure_step = self.structure_generator(output) | |
# loc | |
loc_step = self.loc_generator(output) | |
return hidden, structure_step, loc_step | |
def _char_to_onehot(self, input_char): | |
input_ont_hot = F.one_hot(input_char, self.num_embeddings) | |
return input_ont_hot | |