Spaces:

nishantguvvada
/

Image-Captioning

Sleeping

App Files Files Community

nishantguvvada commited on Dec 25, 2023

Commit

ec5678b

1 Parent(s): fba3e16

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -5

app.py CHANGED Viewed

@@ -37,10 +37,60 @@ def load_image_model():
     image_model=tf.keras.models.load_model('./image_caption_model.h5')
     return image_model
-@st.cache_resource()
-def load_decoder_model():
-    decoder_model=tf.keras.models.load_model('./decoder_pred_model.h5')
-    return decoder_model
 # @st.cache_resource()
 # def load_encoder_model():
@@ -107,7 +157,7 @@ def predict_caption(file):
     features = encoder(tf.expand_dims(img, axis=0))
     dec_input = tf.expand_dims([word_to_index("<start>")], 1)
     result = []
-    decoder_pred_model = load_decoder_model()
     for i in range(MAX_CAPTION_LEN):
         predictions, gru_state = decoder_pred_model(
             [dec_input, gru_state, features]

     image_model=tf.keras.models.load_model('./image_caption_model.h5')
     return image_model
+# @st.cache_resource()
+# def load_decoder_model():
+#     decoder_model=tf.keras.models.load_model('./decoder_pred_model.h5')
+#     return decoder_model
+word_input = Input(shape=(MAX_CAPTION_LEN), name="words")
+embed_x = Embedding(VOCAB_SIZE, ATTENTION_DIM)(word_input)
+decoder_gru = GRU(
+    ATTENTION_DIM,
+    return_sequences=True,
+    return_state=True,
+)
+gru_output, gru_state = decoder_gru(embed_x)
+decoder_attention = Attention()
+context_vector = decoder_attention([gru_output, encoder_output])
+addition = Add()([gru_output, context_vector])
+layer_norm = LayerNormalization(axis=-1)
+layer_norm_out = layer_norm(addition)
+decoder_output_dense = Dense(VOCAB_SIZE)
+decoder_output = decoder_output_dense(layer_norm_out)
+decoder = tf.keras.Model(
+    inputs=[word_input, encoder_output], outputs=decoder_output
+)
+gru_state_input = Input(shape=(ATTENTION_DIM), name="gru_state_input")
+# Reuse trained GRU, but update it so that it can receive states.
+gru_output, gru_state = decoder_gru(embed_x, initial_state=gru_state_input)
+# Reuse other layers as well
+context_vector = decoder_attention([gru_output, encoder_output])
+addition_output = Add()([gru_output, context_vector])
+layer_norm_output = layer_norm(addition_output)
+decoder_output = decoder_output_dense(layer_norm_output)
+# Define prediction Model with state input and output
+decoder_pred_model = tf.keras.Model(
+    inputs=[word_input, gru_state_input, encoder_output],
+    outputs=[decoder_output, gru_state],
+)
 # @st.cache_resource()
 # def load_encoder_model():
     features = encoder(tf.expand_dims(img, axis=0))
     dec_input = tf.expand_dims([word_to_index("<start>")], 1)
     result = []
+    # decoder_pred_model = load_decoder_model()
     for i in range(MAX_CAPTION_LEN):
         predictions, gru_state = decoder_pred_model(
             [dec_input, gru_state, features]