Spaces:
Sleeping
Sleeping
Commit
·
7291c1b
1
Parent(s):
72c24b3
Update app.py
Browse files
app.py
CHANGED
@@ -37,16 +37,64 @@ def load_image_model():
|
|
37 |
image_model=tf.keras.models.load_model('./image_caption_model.h5')
|
38 |
return image_model
|
39 |
|
40 |
-
@st.cache_resource()
|
41 |
-
def load_decoder_model():
|
42 |
-
|
43 |
-
|
44 |
|
45 |
-
@st.cache_resource()
|
46 |
-
def load_encoder_model():
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
st.title(":blue[Nishant Guvvada's] :red[AI Journey] Image Caption Generation")
|
52 |
image = Image.open('./title.jpg')
|
@@ -89,11 +137,11 @@ def predict_caption(file):
|
|
89 |
resize = tf.image.resize(image, (IMG_HEIGHT, IMG_WIDTH))
|
90 |
img = resize/255
|
91 |
|
92 |
-
encoder = load_encoder_model()
|
93 |
features = encoder(tf.expand_dims(img, axis=0))
|
94 |
dec_input = tf.expand_dims([word_to_index("<start>")], 1)
|
95 |
result = []
|
96 |
-
decoder_pred_model = load_decoder_model()
|
97 |
for i in range(MAX_CAPTION_LEN):
|
98 |
predictions, gru_state = decoder_pred_model(
|
99 |
[dec_input, gru_state, features]
|
|
|
37 |
image_model=tf.keras.models.load_model('./image_caption_model.h5')
|
38 |
return image_model
|
39 |
|
40 |
+
# @st.cache_resource()
|
41 |
+
# def load_decoder_model():
|
42 |
+
# decoder_model=tf.keras.models.load_model('./decoder_pred_model.h5')
|
43 |
+
# return decoder_model
|
44 |
|
45 |
+
# @st.cache_resource()
|
46 |
+
# def load_encoder_model():
|
47 |
+
# encoder=tf.keras.models.load_model('./encoder_model.h5')
|
48 |
+
# return encoder
|
49 |
+
|
50 |
+
|
51 |
+
# **** DECODER ****
|
52 |
+
|
53 |
+
word_input = Input(shape=(MAX_CAPTION_LEN), name="words")
|
54 |
+
embed_x = Embedding(VOCAB_SIZE, ATTENTION_DIM)(word_input)
|
55 |
+
|
56 |
+
decoder_gru = GRU(
|
57 |
+
ATTENTION_DIM,
|
58 |
+
return_sequences=True,
|
59 |
+
return_state=True,
|
60 |
+
)
|
61 |
+
|
62 |
+
gru_output, gru_state = decoder_gru(embed_x)
|
63 |
+
|
64 |
+
decoder_attention = Attention()
|
65 |
+
context_vector = decoder_attention([gru_output, encoder_output])
|
66 |
+
addition = Add()([gru_output, context_vector])
|
67 |
+
|
68 |
+
layer_norm = LayerNormalization(axis=-1)
|
69 |
+
layer_norm_out = layer_norm(addition)
|
70 |
+
|
71 |
+
decoder_output_dense = Dense(VOCAB_SIZE)
|
72 |
+
|
73 |
+
# -----------
|
74 |
+
gru_state_input = Input(shape=(ATTENTION_DIM), name="gru_state_input")
|
75 |
+
|
76 |
+
# Reuse trained GRU, but update it so that it can receive states.
|
77 |
+
gru_output, gru_state = decoder_gru(embed_x, initial_state=gru_state_input)
|
78 |
+
|
79 |
+
# Reuse other layers as well
|
80 |
+
context_vector = decoder_attention([gru_output, encoder_output])
|
81 |
+
addition_output = Add()([gru_output, context_vector])
|
82 |
+
layer_norm_output = layer_norm(addition_output)
|
83 |
+
|
84 |
+
decoder_output = decoder_output_dense(layer_norm_output)
|
85 |
+
|
86 |
+
# Define prediction Model with state input and output
|
87 |
+
decoder_pred_model = tf.keras.Model(
|
88 |
+
inputs=[word_input, gru_state_input, encoder_output],
|
89 |
+
outputs=[decoder_output, gru_state],
|
90 |
+
)
|
91 |
+
# **** DECODER ****
|
92 |
|
93 |
+
# **** ENCODER ****
|
94 |
+
image_input = Input(shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
|
95 |
+
encoder_output = Dense(ATTENTION_DIM, activation="relu")(x)
|
96 |
+
encoder = tf.keras.Model(inputs=image_input, outputs=encoder_output)
|
97 |
+
# **** ENCODER ****
|
98 |
|
99 |
st.title(":blue[Nishant Guvvada's] :red[AI Journey] Image Caption Generation")
|
100 |
image = Image.open('./title.jpg')
|
|
|
137 |
resize = tf.image.resize(image, (IMG_HEIGHT, IMG_WIDTH))
|
138 |
img = resize/255
|
139 |
|
140 |
+
# encoder = load_encoder_model()
|
141 |
features = encoder(tf.expand_dims(img, axis=0))
|
142 |
dec_input = tf.expand_dims([word_to_index("<start>")], 1)
|
143 |
result = []
|
144 |
+
# decoder_pred_model = load_decoder_model()
|
145 |
for i in range(MAX_CAPTION_LEN):
|
146 |
predictions, gru_state = decoder_pred_model(
|
147 |
[dec_input, gru_state, features]
|