Spaces:
Sleeping
Sleeping
Commit
·
13147dd
1
Parent(s):
3632e62
Update app.py
Browse files
app.py
CHANGED
@@ -37,20 +37,15 @@ def load_image_model():
|
|
37 |
image_model=tf.keras.models.load_model('./image_caption_model.h5')
|
38 |
return image_model
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
# @st.cache_resource()
|
50 |
-
# def load_encoder_model():
|
51 |
-
# encoder=tf.keras.models.load_model('./encoder_model.h5')
|
52 |
-
# return encoder
|
53 |
|
|
|
|
|
|
|
|
|
54 |
|
55 |
@st.cache_resource()
|
56 |
def feature_extractor():
|
@@ -67,56 +62,6 @@ encoder_output = Dense(ATTENTION_DIM, activation="relu")(x)
|
|
67 |
encoder = tf.keras.Model(inputs=image_input, outputs=encoder_output)
|
68 |
|
69 |
|
70 |
-
#****
|
71 |
-
word_input = Input(shape=(MAX_CAPTION_LEN), name="words")
|
72 |
-
embed_x = Embedding(VOCAB_SIZE, ATTENTION_DIM)(word_input)
|
73 |
-
|
74 |
-
decoder_gru = GRU(
|
75 |
-
ATTENTION_DIM,
|
76 |
-
return_sequences=True,
|
77 |
-
return_state=True,
|
78 |
-
)
|
79 |
-
gru_output, gru_state = decoder_gru(embed_x)
|
80 |
-
|
81 |
-
decoder_attention = Attention()
|
82 |
-
context_vector = decoder_attention([gru_output, encoder_output])
|
83 |
-
|
84 |
-
addition = Add()([gru_output, context_vector])
|
85 |
-
|
86 |
-
layer_norm = LayerNormalization(axis=-1)
|
87 |
-
layer_norm_out = layer_norm(addition)
|
88 |
-
|
89 |
-
decoder_output_dense = Dense(VOCAB_SIZE)
|
90 |
-
decoder_output = decoder_output_dense(layer_norm_out)
|
91 |
-
|
92 |
-
|
93 |
-
decoder = tf.keras.Model(
|
94 |
-
inputs=[word_input, encoder_output], outputs=decoder_output
|
95 |
-
)
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
gru_state_input = Input(shape=(ATTENTION_DIM), name="gru_state_input")
|
100 |
-
|
101 |
-
# Reuse trained GRU, but update it so that it can receive states.
|
102 |
-
gru_output, gru_state = decoder_gru(embed_x, initial_state=gru_state_input)
|
103 |
-
|
104 |
-
# Reuse other layers as well
|
105 |
-
context_vector = decoder_attention([gru_output, encoder_output])
|
106 |
-
addition_output = Add()([gru_output, context_vector])
|
107 |
-
layer_norm_output = layer_norm(addition_output)
|
108 |
-
|
109 |
-
decoder_output = decoder_output_dense(layer_norm_output)
|
110 |
-
|
111 |
-
# Define prediction Model with state input and output
|
112 |
-
decoder_pred_model = tf.keras.Model(
|
113 |
-
inputs=[word_input, gru_state_input, encoder_output],
|
114 |
-
outputs=[decoder_output, gru_state],
|
115 |
-
)
|
116 |
-
# ****
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
|
121 |
st.title(":blue[Nishant Guvvada's] :red[AI Journey] Image Caption Generation")
|
122 |
image = Image.open('./title.jpg')
|
@@ -159,11 +104,11 @@ def predict_caption(file):
|
|
159 |
resize = tf.image.resize(image, (IMG_HEIGHT, IMG_WIDTH))
|
160 |
img = resize/255
|
161 |
|
162 |
-
|
163 |
features = encoder(tf.expand_dims(img, axis=0))
|
164 |
dec_input = tf.expand_dims([word_to_index("<start>")], 1)
|
165 |
result = []
|
166 |
-
|
167 |
for i in range(MAX_CAPTION_LEN):
|
168 |
predictions, gru_state = decoder_pred_model(
|
169 |
[dec_input, gru_state, features]
|
|
|
37 |
image_model=tf.keras.models.load_model('./image_caption_model.h5')
|
38 |
return image_model
|
39 |
|
40 |
+
@st.cache_resource()
|
41 |
+
def load_decoder_model():
|
42 |
+
decoder_model=tf.keras.models.load_model('./decoder_pred_model.h5')
|
43 |
+
return decoder_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
@st.cache_resource()
|
46 |
+
def load_encoder_model():
|
47 |
+
encoder=tf.keras.models.load_model('./encoder_model.h5')
|
48 |
+
return encoder
|
49 |
|
50 |
@st.cache_resource()
|
51 |
def feature_extractor():
|
|
|
62 |
encoder = tf.keras.Model(inputs=image_input, outputs=encoder_output)
|
63 |
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
st.title(":blue[Nishant Guvvada's] :red[AI Journey] Image Caption Generation")
|
67 |
image = Image.open('./title.jpg')
|
|
|
104 |
resize = tf.image.resize(image, (IMG_HEIGHT, IMG_WIDTH))
|
105 |
img = resize/255
|
106 |
|
107 |
+
encoder = load_encoder_model()
|
108 |
features = encoder(tf.expand_dims(img, axis=0))
|
109 |
dec_input = tf.expand_dims([word_to_index("<start>")], 1)
|
110 |
result = []
|
111 |
+
decoder_pred_model = load_decoder_model()
|
112 |
for i in range(MAX_CAPTION_LEN):
|
113 |
predictions, gru_state = decoder_pred_model(
|
114 |
[dec_input, gru_state, features]
|