Spaces:

datdo2717
/

Scan_Doc_App

Runtime error

App Files Files Community

Scan_Doc_App / Rotate /ppocr /modeling /heads /table_att_head.py

datdo2717

rotate

c5b5437 about 1 year ago

raw

history blame contribute delete

10.3 kB

	# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import math
	import paddle
	import paddle.nn as nn
	from paddle import ParamAttr
	import paddle.nn.functional as F
	import numpy as np

	from .rec_att_head import AttentionGRUCell


	def get_para_bias_attr(l2_decay, k):
	if l2_decay > 0:
	regularizer = paddle.regularizer.L2Decay(l2_decay)
	stdv = 1.0 / math.sqrt(k * 1.0)
	initializer = nn.initializer.Uniform(-stdv, stdv)
	else:
	regularizer = None
	initializer = None
	weight_attr = ParamAttr(regularizer=regularizer, initializer=initializer)
	bias_attr = ParamAttr(regularizer=regularizer, initializer=initializer)
	return [weight_attr, bias_attr]


	class TableAttentionHead(nn.Layer):
	def __init__(self,
	in_channels,
	hidden_size,
	in_max_len=488,
	max_text_length=800,
	out_channels=30,
	loc_reg_num=4,
	**kwargs):
	super(TableAttentionHead, self).__init__()
	self.input_size = in_channels[-1]
	self.hidden_size = hidden_size
	self.out_channels = out_channels
	self.max_text_length = max_text_length

	self.structure_attention_cell = AttentionGRUCell(
	self.input_size, hidden_size, self.out_channels, use_gru=False)
	self.structure_generator = nn.Linear(hidden_size, self.out_channels)
	self.in_max_len = in_max_len

	if self.in_max_len == 640:
	self.loc_fea_trans = nn.Linear(400, self.max_text_length + 1)
	elif self.in_max_len == 800:
	self.loc_fea_trans = nn.Linear(625, self.max_text_length + 1)
	else:
	self.loc_fea_trans = nn.Linear(256, self.max_text_length + 1)
	self.loc_generator = nn.Linear(self.input_size + hidden_size,
	loc_reg_num)

	def _char_to_onehot(self, input_char, onehot_dim):
	input_ont_hot = F.one_hot(input_char, onehot_dim)
	return input_ont_hot

	def forward(self, inputs, targets=None):
	# if and else branch are both needed when you want to assign a variable
	# if you modify the var in just one branch, then the modification will not work.
	fea = inputs[-1]
	last_shape = int(np.prod(fea.shape[2:])) # gry added
	fea = paddle.reshape(fea, [fea.shape[0], fea.shape[1], last_shape])
	fea = fea.transpose([0, 2, 1]) # (NTC)(batch, width, channels)
	batch_size = fea.shape[0]

	hidden = paddle.zeros((batch_size, self.hidden_size))
	output_hiddens = paddle.zeros(
	(batch_size, self.max_text_length + 1, self.hidden_size))
	if self.training and targets is not None:
	structure = targets[0]
	for i in range(self.max_text_length + 1):
	elem_onehots = self._char_to_onehot(
	structure[:, i], onehot_dim=self.out_channels)
	(outputs, hidden), alpha = self.structure_attention_cell(
	hidden, fea, elem_onehots)
	output_hiddens[:, i, :] = outputs
	structure_probs = self.structure_generator(output_hiddens)
	loc_fea = fea.transpose([0, 2, 1])
	loc_fea = self.loc_fea_trans(loc_fea)
	loc_fea = loc_fea.transpose([0, 2, 1])
	loc_concat = paddle.concat([output_hiddens, loc_fea], axis=2)
	loc_preds = self.loc_generator(loc_concat)
	loc_preds = F.sigmoid(loc_preds)
	else:
	temp_elem = paddle.zeros(shape=[batch_size], dtype="int32")
	structure_probs = None
	loc_preds = None
	elem_onehots = None
	outputs = None
	alpha = None
	max_text_length = paddle.to_tensor(self.max_text_length)
	for i in range(max_text_length + 1):
	elem_onehots = self._char_to_onehot(
	temp_elem, onehot_dim=self.out_channels)
	(outputs, hidden), alpha = self.structure_attention_cell(
	hidden, fea, elem_onehots)
	output_hiddens[:, i, :] = outputs
	structure_probs_step = self.structure_generator(outputs)
	temp_elem = structure_probs_step.argmax(axis=1, dtype="int32")

	structure_probs = self.structure_generator(output_hiddens)
	structure_probs = F.softmax(structure_probs)
	loc_fea = fea.transpose([0, 2, 1])
	loc_fea = self.loc_fea_trans(loc_fea)
	loc_fea = loc_fea.transpose([0, 2, 1])
	loc_concat = paddle.concat([output_hiddens, loc_fea], axis=2)
	loc_preds = self.loc_generator(loc_concat)
	loc_preds = F.sigmoid(loc_preds)
	return {'structure_probs': structure_probs, 'loc_preds': loc_preds}


	class SLAHead(nn.Layer):
	def __init__(self,
	in_channels,
	hidden_size,
	out_channels=30,
	max_text_length=500,
	loc_reg_num=4,
	fc_decay=0.0,
	**kwargs):
	"""
	@param in_channels: input shape
	@param hidden_size: hidden_size for RNN and Embedding
	@param out_channels: num_classes to rec
	@param max_text_length: max text pred
	"""
	super().__init__()
	in_channels = in_channels[-1]
	self.hidden_size = hidden_size
	self.max_text_length = max_text_length
	self.emb = self._char_to_onehot
	self.num_embeddings = out_channels
	self.loc_reg_num = loc_reg_num

	# structure
	self.structure_attention_cell = AttentionGRUCell(
	in_channels, hidden_size, self.num_embeddings)
	weight_attr, bias_attr = get_para_bias_attr(
	l2_decay=fc_decay, k=hidden_size)
	weight_attr1_1, bias_attr1_1 = get_para_bias_attr(
	l2_decay=fc_decay, k=hidden_size)
	weight_attr1_2, bias_attr1_2 = get_para_bias_attr(
	l2_decay=fc_decay, k=hidden_size)
	self.structure_generator = nn.Sequential(
	nn.Linear(
	self.hidden_size,
	self.hidden_size,
	weight_attr=weight_attr1_2,
	bias_attr=bias_attr1_2),
	nn.Linear(
	hidden_size,
	out_channels,
	weight_attr=weight_attr,
	bias_attr=bias_attr))
	# loc
	weight_attr1, bias_attr1 = get_para_bias_attr(
	l2_decay=fc_decay, k=self.hidden_size)
	weight_attr2, bias_attr2 = get_para_bias_attr(
	l2_decay=fc_decay, k=self.hidden_size)
	self.loc_generator = nn.Sequential(
	nn.Linear(
	self.hidden_size,
	self.hidden_size,
	weight_attr=weight_attr1,
	bias_attr=bias_attr1),
	nn.Linear(
	self.hidden_size,
	loc_reg_num,
	weight_attr=weight_attr2,
	bias_attr=bias_attr2),
	nn.Sigmoid())

	def forward(self, inputs, targets=None):
	fea = inputs[-1]
	batch_size = fea.shape[0]
	# reshape
	fea = paddle.reshape(fea, [fea.shape[0], fea.shape[1], -1])
	fea = fea.transpose([0, 2, 1]) # (NTC)(batch, width, channels)

	hidden = paddle.zeros((batch_size, self.hidden_size))
	structure_preds = paddle.zeros(
	(batch_size, self.max_text_length + 1, self.num_embeddings))
	loc_preds = paddle.zeros(
	(batch_size, self.max_text_length + 1, self.loc_reg_num))
	structure_preds.stop_gradient = True
	loc_preds.stop_gradient = True
	if self.training and targets is not None:
	structure = targets[0]
	for i in range(self.max_text_length + 1):
	hidden, structure_step, loc_step = self._decode(structure[:, i],
	fea, hidden)
	structure_preds[:, i, :] = structure_step
	loc_preds[:, i, :] = loc_step
	else:
	pre_chars = paddle.zeros(shape=[batch_size], dtype="int32")
	max_text_length = paddle.to_tensor(self.max_text_length)
	# for export
	loc_step, structure_step = None, None
	for i in range(max_text_length + 1):
	hidden, structure_step, loc_step = self._decode(pre_chars, fea,
	hidden)
	pre_chars = structure_step.argmax(axis=1, dtype="int32")
	structure_preds[:, i, :] = structure_step
	loc_preds[:, i, :] = loc_step
	if not self.training:
	structure_preds = F.softmax(structure_preds)
	return {'structure_probs': structure_preds, 'loc_preds': loc_preds}

	def _decode(self, pre_chars, features, hidden):
	"""
	Predict table label and coordinates for each step
	@param pre_chars: Table label in previous step
	@param features:
	@param hidden: hidden status in previous step
	@return:
	"""
	emb_feature = self.emb(pre_chars)
	# output shape is b * self.hidden_size
	(output, hidden), alpha = self.structure_attention_cell(
	hidden, features, emb_feature)

	# structure
	structure_step = self.structure_generator(output)
	# loc
	loc_step = self.loc_generator(output)
	return hidden, structure_step, loc_step

	def _char_to_onehot(self, input_char):
	input_ont_hot = F.one_hot(input_char, self.num_embeddings)
	return input_ont_hot