Spaces:
Sleeping
Sleeping
File size: 4,678 Bytes
73d4923 7301eb7 73d4923 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import numpy as np
import tensorflow as tf
import keras
from keras.models import Model
import warnings
warnings.filterwarnings('ignore')
class Encoder(Model):
def __init__(self, embed_dim):
super(Encoder, self).__init__()
self.dense = tf.keras.layers.Dense(embed_dim)
def call(self, features):
features = self.dense(features)
features = tf.keras.activations.relu(features)
return features
class Attention_model(Model):
def __init__(self, units):
super(Attention_model, self).__init__()
self.W1 = tf.keras.layers.Dense(units)
self.W2 = tf.keras.layers.Dense(units)
self.V = tf.keras.layers.Dense(1)
self.units = units
def call(self, features, hidden):
hidden_with_time_axis = hidden[:, tf.newaxis]
score = tf.keras.activations.tanh(self.W1(features) + self.W2(hidden_with_time_axis))
attention_weights = tf.keras.activations.softmax(self.V(score), axis=1)
context_vector = attention_weights * features
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
class Decoder(Model):
def __init__(self, embed_dim, units, vocab_size):
super(Decoder, self).__init__()
self.units = units
self.attention = Attention_model(self.units)
self.embed = tf.keras.layers.Embedding(vocab_size, embed_dim)
self.gru = tf.keras.layers.GRU(self.units, return_sequences=True, return_state=True,
recurrent_initializer='glorot_uniform')
self.d1 = tf.keras.layers.Dense(self.units)
self.d2 = tf.keras.layers.Dense(vocab_size)
def call(self, x, features, hidden):
context_vector, attention_weights = self.attention(features, hidden)
embed = self.embed(x)
embed = tf.concat([tf.expand_dims(context_vector, 1), embed], axis=-1)
output, state = self.gru(embed)
output = self.d1(output)
output = tf.reshape(output, (-1, output.shape[2]))
output = self.d2(output)
return output, state, attention_weights
def init_state(self, batch_size):
return tf.zeros((batch_size, self.units))
def reset_state(self, batch_size):
return tf.zeros((batch_size, self.units))
# Loading the tokenizer
with open("efb-requirements/tokenizer.json", 'r', encoding='utf-8') as f:
loaded_tokenizer_json = f.read()
tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(loaded_tokenizer_json)
def load_and_process_image(image, target_size=(299, 299)):
img = tf.convert_to_tensor(image)
img = tf.cast(img, tf.uint8)
img = tf.image.resize(img, target_size)
img = tf.keras.applications.inception_v3.preprocess_input(img)
return img
image_features_extract_model = keras.models.load_model("efb-requirements/inception_v3.h5")
embedding_dim = 256
units = 512
vocab_size = 5001
encoder = Encoder(embedding_dim)
decoder = Decoder(embedding_dim, units, vocab_size)
# Creating dummy inputs
dummy_img_input = tf.ones((32, 64, 2048))
features = encoder(dummy_img_input)
hidden = decoder.init_state(32)
dec_input = tf.expand_dims([tokenizer.word_index['<start>']] * 32, 1)
dec = decoder(dec_input, features, hidden)
# Loading saved models
encoder.load_weights("efb-requirements/encoder_50epoch_weights.h5")
decoder.load_weights("efb-requirements/decoder_50epoch_weights.h5")
def evaluate(image):
max_length = 39
attention_plot = np.zeros((max_length, 64))
hidden = decoder.reset_state(batch_size=1)
# processing the input image to desired format before extracting features
temp_input = tf.expand_dims(load_and_process_image(image), 0)
img_tensor_val = image_features_extract_model(temp_input)
img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0], -1, img_tensor_val.shape[3]))
cnn_features = encoder(img_tensor_val)
decoder_input = tf.expand_dims([tokenizer.word_index['<start>']], 0)
result = []
predictions = ''
for i in range(max_length):
predictions, hidden, attention_weights = decoder(decoder_input, cnn_features, hidden)
attention_plot[i] = tf.reshape(attention_weights, (-1,)).numpy()
predicted_id = tf.argmax(predictions[0]).numpy()
result.append(tokenizer.index_word[predicted_id])
if tokenizer.index_word[predicted_id] == '<end>':
# return result, attention_plot, predictions
return result
decoder_input = tf.expand_dims([predicted_id], 0)
attention_plot = attention_plot[:len(result), :]
# return result, attention_plot, predictions
return result
|