Spaces:
Sleeping
Sleeping
Commit
·
ec5678b
1
Parent(s):
fba3e16
Update app.py
Browse files
app.py
CHANGED
@@ -37,10 +37,60 @@ def load_image_model():
|
|
37 |
image_model=tf.keras.models.load_model('./image_caption_model.h5')
|
38 |
return image_model
|
39 |
|
40 |
-
@st.cache_resource()
|
41 |
-
def load_decoder_model():
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
# @st.cache_resource()
|
46 |
# def load_encoder_model():
|
@@ -107,7 +157,7 @@ def predict_caption(file):
|
|
107 |
features = encoder(tf.expand_dims(img, axis=0))
|
108 |
dec_input = tf.expand_dims([word_to_index("<start>")], 1)
|
109 |
result = []
|
110 |
-
decoder_pred_model = load_decoder_model()
|
111 |
for i in range(MAX_CAPTION_LEN):
|
112 |
predictions, gru_state = decoder_pred_model(
|
113 |
[dec_input, gru_state, features]
|
|
|
37 |
image_model=tf.keras.models.load_model('./image_caption_model.h5')
|
38 |
return image_model
|
39 |
|
40 |
+
# @st.cache_resource()
|
41 |
+
# def load_decoder_model():
|
42 |
+
# decoder_model=tf.keras.models.load_model('./decoder_pred_model.h5')
|
43 |
+
# return decoder_model
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
word_input = Input(shape=(MAX_CAPTION_LEN), name="words")
|
48 |
+
embed_x = Embedding(VOCAB_SIZE, ATTENTION_DIM)(word_input)
|
49 |
+
|
50 |
+
decoder_gru = GRU(
|
51 |
+
ATTENTION_DIM,
|
52 |
+
return_sequences=True,
|
53 |
+
return_state=True,
|
54 |
+
)
|
55 |
+
gru_output, gru_state = decoder_gru(embed_x)
|
56 |
+
|
57 |
+
decoder_attention = Attention()
|
58 |
+
context_vector = decoder_attention([gru_output, encoder_output])
|
59 |
+
|
60 |
+
addition = Add()([gru_output, context_vector])
|
61 |
+
|
62 |
+
layer_norm = LayerNormalization(axis=-1)
|
63 |
+
layer_norm_out = layer_norm(addition)
|
64 |
+
|
65 |
+
decoder_output_dense = Dense(VOCAB_SIZE)
|
66 |
+
decoder_output = decoder_output_dense(layer_norm_out)
|
67 |
+
|
68 |
+
|
69 |
+
decoder = tf.keras.Model(
|
70 |
+
inputs=[word_input, encoder_output], outputs=decoder_output
|
71 |
+
)
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
+
gru_state_input = Input(shape=(ATTENTION_DIM), name="gru_state_input")
|
76 |
+
|
77 |
+
# Reuse trained GRU, but update it so that it can receive states.
|
78 |
+
gru_output, gru_state = decoder_gru(embed_x, initial_state=gru_state_input)
|
79 |
+
|
80 |
+
# Reuse other layers as well
|
81 |
+
context_vector = decoder_attention([gru_output, encoder_output])
|
82 |
+
addition_output = Add()([gru_output, context_vector])
|
83 |
+
layer_norm_output = layer_norm(addition_output)
|
84 |
+
|
85 |
+
decoder_output = decoder_output_dense(layer_norm_output)
|
86 |
+
|
87 |
+
# Define prediction Model with state input and output
|
88 |
+
decoder_pred_model = tf.keras.Model(
|
89 |
+
inputs=[word_input, gru_state_input, encoder_output],
|
90 |
+
outputs=[decoder_output, gru_state],
|
91 |
+
)
|
92 |
+
|
93 |
+
|
94 |
|
95 |
# @st.cache_resource()
|
96 |
# def load_encoder_model():
|
|
|
157 |
features = encoder(tf.expand_dims(img, axis=0))
|
158 |
dec_input = tf.expand_dims([word_to_index("<start>")], 1)
|
159 |
result = []
|
160 |
+
# decoder_pred_model = load_decoder_model()
|
161 |
for i in range(MAX_CAPTION_LEN):
|
162 |
predictions, gru_state = decoder_pred_model(
|
163 |
[dec_input, gru_state, features]
|