Spaces:

deanna-emery
/

ASL-MoViNet-T5-translator

Runtime error

App Files Files Community

ASL-MoViNet-T5-translator / official /nlp /modeling /models /xlnet.py

deanna-emery

updates

93528c6 about 1 year ago

raw

history blame

12 kB

	# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""XLNet models."""
	# pylint: disable=g-classes-have-attributes

	from typing import Any, Mapping, Optional, Union

	import tensorflow as tf, tf_keras

	from official.nlp.modeling import layers
	from official.nlp.modeling import networks


	class XLNetMaskedLM(tf_keras.layers.Layer):
	"""XLNet pretraining head."""

	def __init__(self,
	vocab_size: int,
	hidden_size: int,
	initializer: str = 'glorot_uniform',
	activation: str = 'gelu',
	name=None,
	**kwargs):
	super().__init__(name=name, **kwargs)
	self._vocab_size = vocab_size
	self._hidden_size = hidden_size
	self._initializer = initializer
	self._activation = activation

	def build(self, input_shape):
	self.dense = tf_keras.layers.Dense(
	units=self._hidden_size,
	activation=self._activation,
	kernel_initializer=self._initializer,
	name='transform/dense')
	self.layer_norm = tf_keras.layers.LayerNormalization(
	axis=-1, epsilon=1e-12, name='transform/LayerNorm')
	self.bias = self.add_weight(
	'output_bias/bias',
	shape=(self._vocab_size,),
	initializer='zeros',
	trainable=True)
	super().build(input_shape)

	def call(self,
	sequence_data: tf.Tensor,
	embedding_table: tf.Tensor):
	lm_data = self.dense(sequence_data)
	lm_data = self.layer_norm(lm_data)
	lm_data = tf.matmul(lm_data, embedding_table, transpose_b=True)
	logits = tf.nn.bias_add(lm_data, self.bias)
	return logits

	def get_config(self) -> Mapping[str, Any]:
	config = {
	'vocab_size':
	self._vocab_size,
	'hidden_size':
	self._hidden_size,
	'initializer':
	self._initializer
	}
	base_config = super(XLNetMaskedLM, self).get_config()
	return dict(list(base_config.items()) + list(config.items()))


	@tf_keras.utils.register_keras_serializable(package='Text')
	class XLNetPretrainer(tf_keras.Model):
	"""XLNet-based pretrainer.

	This is an implementation of the network structure surrounding a
	Transformer-XL encoder as described in "XLNet: Generalized Autoregressive
	Pretraining for Language Understanding" (https://arxiv.org/abs/1906.08237).

	Args:
	network: An XLNet/Transformer-XL based network. This network should output a
	sequence output and list of `state` tensors.
	mlm_activation: The activation (if any) to use in the Masked LM network. If
	None, then no activation will be used.
	mlm_initializer: The initializer (if any) to use in the masked LM. Defaults
	to a Glorot uniform initializer.

	"""

	def __init__(
	self,
	network: Union[tf_keras.layers.Layer, tf_keras.Model],
	mlm_activation=None,
	mlm_initializer='glorot_uniform',
	name: Optional[str] = None,
	**kwargs):
	super().__init__(name=name, **kwargs)
	self._config = {
	'network': network,
	'mlm_activation': mlm_activation,
	'mlm_initializer': mlm_initializer,
	}
	self._network = network
	self._hidden_size = network.get_config()['hidden_size']
	self._vocab_size = network.get_config()['vocab_size']
	self._activation = mlm_activation
	self._initializer = mlm_initializer
	self._masked_lm = XLNetMaskedLM(
	vocab_size=self._vocab_size,
	hidden_size=self._hidden_size,
	initializer=self._initializer)

	def call(self, inputs: Mapping[str, Any]): # pytype: disable=signature-mismatch # overriding-parameter-count-checks
	input_word_ids = inputs['input_word_ids']
	input_type_ids = inputs['input_type_ids']
	masked_tokens = inputs['masked_tokens']
	permutation_mask = inputs['permutation_mask']
	target_mapping = inputs['target_mapping']
	state = inputs.get('state', None)

	attention_output, state = self._network(
	input_ids=input_word_ids,
	segment_ids=input_type_ids,
	input_mask=None,
	state=state,
	permutation_mask=permutation_mask,
	target_mapping=target_mapping,
	masked_tokens=masked_tokens)

	embedding_table = self._network.get_embedding_lookup_table()
	mlm_outputs = self._masked_lm(
	sequence_data=attention_output,
	embedding_table=embedding_table)
	return mlm_outputs, state

	def get_config(self) -> Mapping[str, Any]:
	return self._config

	@classmethod
	def from_config(cls, config, custom_objects=None):
	return cls(**config)

	@property
	def checkpoint_items(self):
	return dict(encoder=self._network)


	@tf_keras.utils.register_keras_serializable(package='Text')
	class XLNetClassifier(tf_keras.Model):
	"""Classifier model based on XLNet.

	This is an implementation of the network structure surrounding a
	Transformer-XL encoder as described in "XLNet: Generalized Autoregressive
	Pretraining for Language Understanding" (https://arxiv.org/abs/1906.08237).

	Note: This model does not use utilize the memory mechanism used in the
	original XLNet Classifier.

	Args:
	network: An XLNet/Transformer-XL based network. This network should output a
	sequence output and list of `state` tensors.
	num_classes: Number of classes to predict from the classification network.
	initializer: The initializer (if any) to use in the classification networks.
	Defaults to a RandomNormal initializer.
	summary_type: Method used to summarize a sequence into a compact vector.
	dropout_rate: The dropout probability of the cls head.
	head_name: Name of the classification head.
	"""

	def __init__(
	self,
	network: Union[tf_keras.layers.Layer, tf_keras.Model],
	num_classes: int,
	initializer: tf_keras.initializers.Initializer = 'random_normal',
	summary_type: str = 'last',
	dropout_rate: float = 0.1,
	head_name: str = 'sentence_prediction', # pytype: disable=annotation-type-mismatch # typed-keras
	**kwargs):
	super().__init__(**kwargs)
	self._network = network
	self._initializer = initializer
	self._summary_type = summary_type
	self._num_classes = num_classes
	self._config = {
	'network': network,
	'initializer': initializer,
	'num_classes': num_classes,
	'summary_type': summary_type,
	'dropout_rate': dropout_rate,
	'head_name': head_name,
	}

	if summary_type == 'last':
	cls_token_idx = -1
	elif summary_type == 'first':
	cls_token_idx = 0
	else:
	raise ValueError('Invalid summary type provided: %s.' % summary_type)

	self.classifier = layers.ClassificationHead(
	inner_dim=network.get_config()['hidden_size'],
	num_classes=num_classes,
	initializer=initializer,
	dropout_rate=dropout_rate,
	cls_token_idx=cls_token_idx,
	name=head_name)

	def call(self, inputs: Mapping[str, Any]): # pytype: disable=signature-mismatch # overriding-parameter-count-checks
	input_ids = inputs['input_word_ids']
	segment_ids = inputs['input_type_ids']
	input_mask = tf.cast(inputs['input_mask'], tf.float32)
	state = inputs.get('mems', None)

	attention_output, _ = self._network(
	input_ids=input_ids,
	segment_ids=segment_ids,
	input_mask=input_mask,
	state=state)

	logits = self.classifier(attention_output)

	return logits

	def get_config(self):
	return self._config

	@classmethod
	def from_config(cls, config, custom_objects=None):
	return cls(**config)

	@property
	def checkpoint_items(self):
	items = dict(encoder=self._network)
	if hasattr(self.classifier, 'checkpoint_items'):
	for key, item in self.classifier.checkpoint_items.items():
	items['.'.join([self.classifier.name, key])] = item
	return items


	@tf_keras.utils.register_keras_serializable(package='Text')
	class XLNetSpanLabeler(tf_keras.Model):
	"""Span labeler model based on XLNet.

	This is an implementation of the network structure surrounding a
	Transformer-XL encoder as described in "XLNet: Generalized Autoregressive
	Pretraining for Language Understanding" (https://arxiv.org/abs/1906.08237).

	Args:
	network: A transformer network. This network should output a sequence output
	and a classification output. Furthermore, it should expose its embedding
	table via a "get_embedding_table" method.
	start_n_top: Beam size for span start.
	end_n_top: Beam size for span end.
	dropout_rate: The dropout rate for the span labeling layer.
	span_labeling_activation: The activation for the span labeling head.
	initializer: The initializer (if any) to use in the span labeling network.
	Defaults to a Glorot uniform initializer.
	"""

	def __init__(
	self,
	network: Union[tf_keras.layers.Layer, tf_keras.Model],
	start_n_top: int = 5,
	end_n_top: int = 5,
	dropout_rate: float = 0.1,
	span_labeling_activation: tf_keras.initializers.Initializer = 'tanh',
	initializer: tf_keras.initializers.Initializer = 'glorot_uniform', # pytype: disable=annotation-type-mismatch # typed-keras
	**kwargs):
	super().__init__(**kwargs)
	self._config = {
	'network': network,
	'start_n_top': start_n_top,
	'end_n_top': end_n_top,
	'dropout_rate': dropout_rate,
	'span_labeling_activation': span_labeling_activation,
	'initializer': initializer,
	}
	network_config = network.get_config()
	try:
	input_width = network_config['inner_size']
	self._xlnet_base = True
	except KeyError:
	# BertEncoder uses 'intermediate_size' due to legacy naming.
	input_width = network_config['intermediate_size']
	self._xlnet_base = False

	self._network = network
	self._initializer = initializer
	self._start_n_top = start_n_top
	self._end_n_top = end_n_top
	self._dropout_rate = dropout_rate
	self._activation = span_labeling_activation
	self.span_labeling = networks.XLNetSpanLabeling(
	input_width=input_width,
	start_n_top=self._start_n_top,
	end_n_top=self._end_n_top,
	activation=self._activation,
	dropout_rate=self._dropout_rate,
	initializer=self._initializer)

	def call(self, inputs: Mapping[str, Any]): # pytype: disable=signature-mismatch # overriding-parameter-count-checks
	input_word_ids = inputs['input_word_ids']
	input_type_ids = inputs['input_type_ids']
	input_mask = inputs['input_mask']
	class_index = inputs['class_index']
	paragraph_mask = inputs['paragraph_mask']
	start_positions = inputs.get('start_positions', None)

	if self._xlnet_base:
	attention_output, _ = self._network(
	input_ids=input_word_ids,
	segment_ids=input_type_ids,
	input_mask=input_mask)
	else:
	network_output_dict = self._network(dict(
	input_word_ids=input_word_ids,
	input_type_ids=input_type_ids,
	input_mask=input_mask))
	attention_output = network_output_dict['sequence_output']

	outputs = self.span_labeling(
	sequence_data=attention_output,
	class_index=class_index,
	paragraph_mask=paragraph_mask,
	start_positions=start_positions)
	return outputs

	@property
	def checkpoint_items(self):
	return dict(encoder=self._network)

	def get_config(self):
	return self._config

	@classmethod
	def from_config(cls, config, custom_objects=None):
	return cls(**config)