Spaces:

rbgo
/

Eng-Ass-Former

Runtime error

App Files Files Community

Eng-Ass-Former / inference.py

rbgo

os removed

bc2d451 over 3 years ago

raw

history blame contribute delete

3.57 kB

	import tensorflow as tf

	def create_padding_mask(seq):
	seq = tf.cast(tf.math.equal(seq, 0), tf.float32)
	# add extra dimensions to add the padding
	# to the attention logits.
	return seq[:, tf.newaxis, tf.newaxis, :] # (batch_size, 1, 1, seq_len)

	def create_look_ahead_mask(size):
	mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
	return mask # (seq_len, seq_len)

	def create_masks(inp, tar):
	# Encoder padding mask
	enc_padding_mask = create_padding_mask(inp)

	# Used in the 2nd attention block in the decoder.
	# This padding mask is used to mask the encoder outputs.
	dec_padding_mask = create_padding_mask(inp)

	# Used in the 1st attention block in the decoder.
	# It is used to pad and mask future tokens in the input received by
	# the decoder.
	look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1])
	dec_target_padding_mask = create_padding_mask(tar)
	combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask)

	return enc_padding_mask, combined_mask, dec_padding_mask

	def translate_main(transformer,inp_sentence,tokenizer_ass,tokenizer_en,MAX_LENGTH):
	def evaluate(inp_sentence):
	start_token = [tokenizer_ass.vocab_size]
	end_token = [tokenizer_ass.vocab_size + 1]

	# inp sentence is portuguese, hence adding the start and end token
	inp_sentence = start_token + tokenizer_ass.encode(inp_sentence) + end_token
	encoder_input = tf.expand_dims(inp_sentence, 0)

	# as the target is english, the first word to the transformer should be the
	# english start token.
	decoder_input = [tokenizer_en.vocab_size]
	output = tf.expand_dims(decoder_input, 0)

	for i in range(MAX_LENGTH):
	enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
	encoder_input, output)

	# predictions.shape == (batch_size, seq_len, vocab_size)
	predictions, attention_weights = transformer(encoder_input,
	output,
	False,
	enc_padding_mask,
	combined_mask,
	dec_padding_mask)

	# select the last word from the seq_len dimension
	predictions = predictions[: ,-1:, :] # (batch_size, 1, vocab_size)

	predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)

	# return the result if the predicted_id is equal to the end token
	if predicted_id == tokenizer_en.vocab_size+1:
	return tf.squeeze(output, axis=0), attention_weights

	# concatentate the predicted_id to the output which is given to the decoder
	# as its input.
	output = tf.concat([output, predicted_id], axis=-1)

	return tf.squeeze(output, axis=0), attention_weights

	def translate(sentence):
	result, attention_weights = evaluate(sentence)

	predicted_sentence = tokenizer_en.decode([i for i in result
	if i < tokenizer_en.vocab_size])

	# print('Input: {}'.format(sentence))
	# print('Predicted translation: {}'.format(predicted_sentence))
	return predicted_sentence

	result = translate(inp_sentence)
	return result