c4ai-usp
/

gap-text2sql

Model card Files Files and versions Community

gap-text2sql / gap-text2sql-main /mrat-sql-gap /seq2struct /models /variational_lstm.py

antonlabate

ver 1.3

d758c99 over 2 years ago

11.9 kB

	from typing import Tuple, List

	import torch

	class RecurrentDropoutLSTMCell(torch.jit.ScriptModule):
	__constants__ = ['hidden_size']

	def __init__(self, input_size, hidden_size, dropout=0.):
	super(RecurrentDropoutLSTMCell, self).__init__()
	self.input_size = input_size
	self.hidden_size = hidden_size
	self.dropout = dropout

	self.W_i = torch.nn.Parameter(torch.empty(hidden_size, input_size))
	self.U_i = torch.nn.Parameter(torch.empty(hidden_size, hidden_size))

	self.W_f = torch.nn.Parameter(torch.empty(hidden_size, input_size))
	self.U_f = torch.nn.Parameter(torch.empty(hidden_size, hidden_size))

	self.W_c = torch.nn.Parameter(torch.empty(hidden_size, input_size))
	self.U_c = torch.nn.Parameter(torch.empty(hidden_size, hidden_size))

	self.W_o = torch.nn.Parameter(torch.empty(hidden_size, input_size))
	self.U_o = torch.nn.Parameter(torch.empty(hidden_size, hidden_size))

	self.bias_ih = torch.nn.Parameter(torch.empty(4 * hidden_size))
	self.bias_hh = torch.nn.Parameter(torch.empty(4 * hidden_size))

	self._input_dropout_mask = torch.jit.Attribute(torch.empty((), requires_grad=False), torch.Tensor)
	self._h_dropout_mask = torch.jit.Attribute(torch.empty((), requires_grad=False), torch.Tensor)
	# call to super is needed because torch.jit.ScriptModule deletes the
	# _register_state_dict_hook and _register_load_state_dict_pre_hook methods.
	# TODO: In Torch 1.3, discontinue use of torch.jit.Attribute so that
	# the dropout masks don't end up in the state dict in the first place.
	super(torch.jit.ScriptModule, self)._register_state_dict_hook(self._hook_remove_dropout_masks_from_state_dict)
	super(torch.jit.ScriptModule, self)._register_load_state_dict_pre_hook(self._hook_add_dropout_masks_to_state_dict)

	self.reset_parameters()

	def reset_parameters(self):
	torch.nn.init.orthogonal_(self.W_i)
	torch.nn.init.orthogonal_(self.U_i)
	torch.nn.init.orthogonal_(self.W_f)
	torch.nn.init.orthogonal_(self.U_f)
	torch.nn.init.orthogonal_(self.W_c)
	torch.nn.init.orthogonal_(self.U_c)
	torch.nn.init.orthogonal_(self.W_o)
	torch.nn.init.orthogonal_(self.U_o)
	self.bias_ih.data.fill_(0.)
	# forget gate set to 1.
	self.bias_ih.data[self.hidden_size:2 * self.hidden_size].fill_(1.)
	self.bias_hh.data.fill_(0.)

	# TODO: the dropout mask should be stored in the state instead?
	def set_dropout_masks(self, batch_size):
	def constant_mask(v):
	return torch.tensor(v).reshape(1, 1, 1).expand(4, batch_size, -1).to(self.W_i.device)

	if self.dropout:
	if self.training:
	new_tensor = self.W_i.data.new
	self._input_dropout_mask = torch.bernoulli(
	new_tensor(4, batch_size, self.input_size).fill_(1 - self.dropout))
	self._h_dropout_mask = torch.bernoulli(
	new_tensor(4, batch_size, self.hidden_size).fill_(1 - self.dropout))
	else:
	mask = constant_mask(1 - self.dropout)
	self._input_dropout_mask = mask
	self._h_dropout_mask = mask
	else:
	mask = constant_mask(1.)
	self._input_dropout_mask = mask
	self._h_dropout_mask = mask

	@classmethod
	def _hook_remove_dropout_masks_from_state_dict(cls, instance, state_dict, prefix, local_metadata):
	del state_dict[prefix + '_input_dropout_mask']
	del state_dict[prefix + '_h_dropout_mask']

	def _hook_add_dropout_masks_to_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
	state_dict[prefix + '_input_dropout_mask'] = self._input_dropout_mask
	state_dict[prefix + '_h_dropout_mask'] = self._h_dropout_mask

	@torch.jit.script_method
	def forward(
	self,
	input: torch.Tensor,
	hidden_state: Tuple[torch.Tensor, torch.Tensor]):
	h_tm1, c_tm1 = hidden_state

	xi_t = torch.nn.functional.linear(input * self._input_dropout_mask[0, :input.shape[0]], self.W_i)
	xf_t = torch.nn.functional.linear(input * self._input_dropout_mask[1, :input.shape[0]], self.W_f)
	xc_t = torch.nn.functional.linear(input * self._input_dropout_mask[2, :input.shape[0]], self.W_c)
	xo_t = torch.nn.functional.linear(input * self._input_dropout_mask[3, :input.shape[0]], self.W_o)

	hi_t = torch.nn.functional.linear(h_tm1 * self._h_dropout_mask[0, :input.shape[0]], self.U_i)
	hf_t = torch.nn.functional.linear(h_tm1 * self._h_dropout_mask[1, :input.shape[0]], self.U_f)
	hc_t = torch.nn.functional.linear(h_tm1 * self._h_dropout_mask[2, :input.shape[0]], self.U_c)
	ho_t = torch.nn.functional.linear(h_tm1 * self._h_dropout_mask[3, :input.shape[0]], self.U_o)

	i_t = torch.sigmoid(xi_t + self.bias_ih[:self.hidden_size] + hi_t + self.bias_hh[:self.hidden_size])
	f_t = torch.sigmoid(xf_t + self.bias_ih[self.hidden_size:2 * self.hidden_size] + hf_t + self.bias_hh[self.hidden_size:2 * self.hidden_size])
	c_t = f_t * c_tm1 + i_t * torch.tanh(xc_t + self.bias_ih[2 * self.hidden_size:3 * self.hidden_size] + hc_t + self.bias_hh[2 * self.hidden_size:3 * self.hidden_size])
	o_t = torch.sigmoid(xo_t + self.bias_ih[3 * self.hidden_size:4 * self.hidden_size] + ho_t + self.bias_hh[3 * self.hidden_size:4 * self.hidden_size])
	h_t = o_t * torch.tanh(c_t)

	return h_t, c_t


	class LSTM(torch.jit.ScriptModule):
	def __init__(self, input_size, hidden_size, bidirectional=False, dropout=0., cell_factory=RecurrentDropoutLSTMCell):
	super(LSTM, self).__init__()
	self.input_size = input_size
	self.hidden_size = hidden_size
	self.bidirectional = bidirectional
	self.dropout = dropout
	self.cell_factory = cell_factory
	num_directions = 2 if bidirectional else 1
	self.lstm_cells = []

	for direction in range(num_directions):
	cell = cell_factory(input_size, hidden_size, dropout=dropout)
	self.lstm_cells.append(cell)

	suffix = '_reverse' if direction == 1 else ''
	cell_name = 'cell{}'.format(suffix)
	self.add_module(cell_name, cell)

	def forward(self, input, hidden_state=None):
	is_packed = isinstance(input, torch.nn.utils.rnn.PackedSequence)
	if not is_packed:
	raise NotImplementedError

	max_batch_size = input.batch_sizes[0]
	for cell in self.lstm_cells:
	cell.set_dropout_masks(max_batch_size)

	if hidden_state is None:
	num_directions = 2 if self.bidirectional else 1
	hx = input.data.new_zeros(num_directions,
	max_batch_size, self.hidden_size,
	requires_grad=False)
	hidden_state = (hx, hx)

	forward_hidden_state = tuple(v[0] for v in hidden_state)
	if self.bidirectional:
	reverse_hidden_state = tuple(v[1] for v in hidden_state)

	forward_output, (forward_h, forward_c) = self._forward_packed(input.data, input.batch_sizes, forward_hidden_state)
	reverse_output, (reverse_h, reverse_c) = self._reverse_packed(input.data, input.batch_sizes, reverse_hidden_state)
	return (torch.nn.utils.rnn.PackedSequence(
	torch.cat((forward_output, reverse_output), dim=-1),
	input.batch_sizes,
	input.sorted_indices,
	input.unsorted_indices),
	# TODO: Support multiple layers
	# TODO: Support batch_first
	(torch.stack((forward_h, reverse_h), dim=0),
	torch.stack((forward_c, reverse_c), dim=0)))

	output, next_hidden = self._forward_packed(input.data, input.batch_sizes, forward_hidden_state)
	return (torch.nn.utils.rnn.PackedSequence(
	output,
	input.batch_sizes,
	input.sorted_indices,
	input.unsorted_indices),
	next_hidden)

	@torch.jit.script_method
	def _forward_packed(self, input: torch.Tensor, batch_sizes: torch.Tensor, hidden_state: Tuple[torch.Tensor, torch.Tensor]):
	# Derived from
	# https://github.com/pytorch/pytorch/blob/6a4ca9abec1c18184635881c08628737c8ed2497/aten/src/ATen/native/RNN.cpp#L589

	step_outputs = []
	hs = []
	cs = []
	input_offset = torch.zeros((), dtype=torch.int64) # scalar zero
	num_steps = batch_sizes.shape[0]
	last_batch_size = batch_sizes[0]

	# Batch sizes is a sequence of decreasing lengths, which are offsets
	# into a 1D list of inputs. At every step we slice out batch_size elements,
	# and possibly account for the decrease in the batch size since the last step,
	# which requires us to slice the hidden state (since some sequences
	# are completed now). The sliced parts are also saved, because we will need
	# to return a tensor of final hidden state.
	h, c = hidden_state
	for i in range(num_steps):
	batch_size = batch_sizes[i]
	step_input = input.narrow(0, input_offset, batch_size)
	input_offset += batch_size
	dec = last_batch_size - batch_size
	if dec > 0:
	hs.append(h[last_batch_size - dec:last_batch_size])
	cs.append(c[last_batch_size - dec:last_batch_size])
	h = h[:last_batch_size - dec]
	c = c[:last_batch_size - dec]
	last_batch_size = batch_size
	h, c = self.cell(step_input, (h, c))
	step_outputs.append(h)

	hs.append(h)
	cs.append(c)
	hs.reverse()
	cs.reverse()

	concat_h = torch.cat(hs)
	concat_c = torch.cat(cs)

	return (torch.cat(step_outputs, dim=0), (concat_h, concat_c))

	@torch.jit.script_method
	def _reverse_packed(self, input: torch.Tensor, batch_sizes: torch.Tensor, hidden_state: Tuple[torch.Tensor, torch.Tensor]):
	# Derived from
	# https://github.com/pytorch/pytorch/blob/6a4ca9abec1c18184635881c08628737c8ed2497/aten/src/ATen/native/RNN.cpp#L650

	step_outputs = []
	input_offset = torch.zeros((), dtype=torch.int64) # scalar zero
	num_steps = batch_sizes.shape[0]
	last_batch_size = batch_sizes[-1]

	# Here the situation is similar to that above, except we start out with
	# the smallest batch size (and a small set of hidden states we actually use),
	# and progressively expand the hidden states, as we move backwards over the
	# 1D list of inputs.
	h, c = hidden_state
	input_h, input_c = hidden_state
	h = h[:batch_sizes[-1]]
	c = c[:batch_sizes[-1]]

	# for i in range(num_steps - 1, -1, -1): # Not supported in torchscript 1.1, so we do a workaround:
	i = num_steps - 1
	while i > -1:
	batch_size = batch_sizes[i]
	inc = batch_size - last_batch_size
	if inc > 0:
	h = torch.cat((h, input_h[last_batch_size:batch_size]))
	c = torch.cat((c, input_c[last_batch_size:batch_size]))
	step_input = input.narrow(0, input_offset - batch_size, batch_size)
	input_offset -= batch_size
	last_batch_size = batch_size
	h, c = self.cell_reverse(step_input, (h, c))
	step_outputs.append(h)
	i -= 1

	step_outputs.reverse()
	return (torch.cat(step_outputs, dim=0), (h, c))