import numpy as np import tensorflow as tf import keras from keras.models import Model import warnings warnings.filterwarnings('ignore') class Encoder(Model): def __init__(self, embed_dim): super(Encoder, self).__init__() self.dense = tf.keras.layers.Dense(embed_dim) def call(self, features): features = self.dense(features) features = tf.keras.activations.relu(features) return features class Attention_model(Model): def __init__(self, units): super(Attention_model, self).__init__() self.W1 = tf.keras.layers.Dense(units) self.W2 = tf.keras.layers.Dense(units) self.V = tf.keras.layers.Dense(1) self.units = units def call(self, features, hidden): hidden_with_time_axis = hidden[:, tf.newaxis] score = tf.keras.activations.tanh(self.W1(features) + self.W2(hidden_with_time_axis)) attention_weights = tf.keras.activations.softmax(self.V(score), axis=1) context_vector = attention_weights * features context_vector = tf.reduce_sum(context_vector, axis=1) return context_vector, attention_weights class Decoder(Model): def __init__(self, embed_dim, units, vocab_size): super(Decoder, self).__init__() self.units = units self.attention = Attention_model(self.units) self.embed = tf.keras.layers.Embedding(vocab_size, embed_dim) self.gru = tf.keras.layers.GRU(self.units, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform') self.d1 = tf.keras.layers.Dense(self.units) self.d2 = tf.keras.layers.Dense(vocab_size) def call(self, x, features, hidden): context_vector, attention_weights = self.attention(features, hidden) embed = self.embed(x) embed = tf.concat([tf.expand_dims(context_vector, 1), embed], axis=-1) output, state = self.gru(embed) output = self.d1(output) output = tf.reshape(output, (-1, output.shape[2])) output = self.d2(output) return output, state, attention_weights def init_state(self, batch_size): return tf.zeros((batch_size, self.units)) def reset_state(self, batch_size): return tf.zeros((batch_size, self.units)) # Loading the tokenizer with open("efb-requirements/tokenizer.json", 'r', encoding='utf-8') as f: loaded_tokenizer_json = f.read() tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(loaded_tokenizer_json) def load_and_process_image(image, target_size=(299, 299)): img = tf.convert_to_tensor(image) img = tf.cast(img, tf.uint8) img = tf.image.resize(img, target_size) img = tf.keras.applications.inception_v3.preprocess_input(img) return img image_features_extract_model = keras.models.load_model("efb-requirements/inception_v3.h5") embedding_dim = 256 units = 512 vocab_size = 5001 encoder = Encoder(embedding_dim) decoder = Decoder(embedding_dim, units, vocab_size) # Creating dummy inputs dummy_img_input = tf.ones((32, 64, 2048)) features = encoder(dummy_img_input) hidden = decoder.init_state(32) dec_input = tf.expand_dims([tokenizer.word_index['']] * 32, 1) dec = decoder(dec_input, features, hidden) # Loading saved models encoder.load_weights("efb-requirements/encoder_50epoch_weights.h5") decoder.load_weights("efb-requirements/decoder_50epoch_weights.h5") def evaluate(image): max_length = 39 attention_plot = np.zeros((max_length, 64)) hidden = decoder.reset_state(batch_size=1) # processing the input image to desired format before extracting features temp_input = tf.expand_dims(load_and_process_image(image), 0) img_tensor_val = image_features_extract_model(temp_input) img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0], -1, img_tensor_val.shape[3])) cnn_features = encoder(img_tensor_val) decoder_input = tf.expand_dims([tokenizer.word_index['']], 0) result = [] predictions = '' for i in range(max_length): predictions, hidden, attention_weights = decoder(decoder_input, cnn_features, hidden) attention_plot[i] = tf.reshape(attention_weights, (-1,)).numpy() predicted_id = tf.argmax(predictions[0]).numpy() result.append(tokenizer.index_word[predicted_id]) if tokenizer.index_word[predicted_id] == '': # return result, attention_plot, predictions return result decoder_input = tf.expand_dims([predicted_id], 0) attention_plot = attention_plot[:len(result), :] # return result, attention_plot, predictions return result