guillermoruiz commited on
Commit
fc80b92
·
verified ·
1 Parent(s): 25fb7e3

Upload TFBilma

Browse files
Files changed (4) hide show
  1. config.json +4 -7
  2. configuration_bilma.py +54 -0
  3. modeling_bilma.py +441 -0
  4. tf_model.h5 +1 -1
config.json CHANGED
@@ -1,20 +1,17 @@
1
  {
2
- "_name_or_path": "bilma_MX_mean",
3
  "add_head": [
4
  512,
5
  15
6
  ],
 
 
 
7
  "auto_map": {
8
  "AutoConfig": "configuration_bilma.BilmaConfig",
9
- "TFAutoModel": "modeling_bilma.TFBilma",
10
- "TFAutoModelForMaskedLM": "modeling_bilma.TFBilma"
11
  },
12
  "hidden_dropout_prob": 0.1,
13
  "hidden_size": 512,
14
- "include_head": [
15
- 512,
16
- 15
17
- ],
18
  "include_top": false,
19
  "model_type": "bilma",
20
  "num_attention_heads": 4,
 
1
  {
 
2
  "add_head": [
3
  512,
4
  15
5
  ],
6
+ "architectures": [
7
+ "Bilma"
8
+ ],
9
  "auto_map": {
10
  "AutoConfig": "configuration_bilma.BilmaConfig",
11
+ "TFAutoModel": "modeling_bilma.TFBilma"
 
12
  },
13
  "hidden_dropout_prob": 0.1,
14
  "hidden_size": 512,
 
 
 
 
15
  "include_top": false,
16
  "model_type": "bilma",
17
  "num_attention_heads": 4,
configuration_bilma.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PretrainedConfig
2
+
3
+ class BilmaConfig(PretrainedConfig):
4
+ model_type = "bilma"
5
+
6
+ def __init__(
7
+ self,
8
+ weights="MX",
9
+ include_top = True,
10
+ add_head = None,
11
+ pooling = None,
12
+ num_attention_heads: int = 4,
13
+ num_hidden_layers: int = 2,
14
+ seq_max_length: int = 280,
15
+ hidden_size: int = 512,
16
+ vocab_size: int = 29025,
17
+ hidden_dropout_prob: float = 0.1,
18
+ **kwargs,
19
+ ):
20
+ countries = ["MX"]
21
+ poolings = ["mean", "cls", "max"]
22
+ if weights not in countries:
23
+ raise ValueError(f"`weights` must be one of {countries}, got {weights}.")
24
+ if add_head is not None and include_top == True:
25
+ raise ValueError(f"To add a head, 'include_top' must be False")
26
+ if pooling is not None and include_top == True:
27
+ raise ValueError(f"To specify a pooling, 'include_top' must be False")
28
+ if pooling is not None and pooling not in poolings:
29
+ raise ValueError(f"`pooling` must be one of {poolings}, got {pooling}.")
30
+ if weights is not None:
31
+ self.weights = weights
32
+ self.include_top = include_top
33
+ self.add_head = add_head
34
+ self.pooling = pooling
35
+ self.num_attention_heads = 4
36
+ self.num_hidden_layers = 2
37
+ self.seq_max_length = 280
38
+ self.hidden_size = 512
39
+ self.vocab_size = 29025
40
+ self.hidden_dropout_prob = 0.1
41
+ super().__init__(**kwargs)
42
+ return
43
+
44
+ self.weights = weights
45
+ self.include_top = include_top
46
+ self.add_head = add_head
47
+ self.pooling = pooling
48
+ self.num_attention_heads = num_attention_heads
49
+ self.num_hidden_layers = num_hidden_layers
50
+ self.seq_max_length = seq_max_length
51
+ self.hidden_size = hidden_size
52
+ self.vocab_size = vocab_size
53
+ self.hidden_dropout_prob = hidden_dropout_prob
54
+ super().__init__(**kwargs)
modeling_bilma.py ADDED
@@ -0,0 +1,441 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import TFPreTrainedModel, PreTrainedTokenizer, BatchEncoding
2
+
3
+ from tensorflow.keras.models import Model, load_model, Sequential
4
+ from tensorflow.keras.layers import Layer, Dense, concatenate, Input, add, Dropout, LayerNormalization, MultiHeadAttention, Embedding
5
+ import tensorflow as tf
6
+ import numpy as np
7
+
8
+ from typing import Dict
9
+
10
+ import re
11
+ import unicodedata
12
+
13
+ from configuration_bilma import BilmaConfig
14
+
15
+ # copied from preprocessing.py
16
+ BLANK = ' '
17
+
18
+ RE_OPS = re.I | re.M | re.S
19
+ RE_USR = re.compile(r"""@\S+""", RE_OPS)
20
+ RE_TAG = re.compile(r"""#\S+""", RE_OPS)
21
+ RE_URL = re.compile(r"""(http|ftp|https)://\S+""", RE_OPS)
22
+ RE_NUM = re.compile(r"""[-+]?\d+\.?\d*""", RE_OPS)
23
+
24
+ SYMBOLS_ = "()[]¿?¡!{}~<>|"
25
+ SYMBOLS = set(";:,.@\\-\"/" + SYMBOLS_)
26
+
27
+
28
+
29
+ # ------------------
30
+ # Class declaration
31
+ # ------------------
32
+
33
+
34
+ class TFBilma(TFPreTrainedModel):
35
+ config_class = BilmaConfig
36
+ main_input_name = "input_ids"
37
+ #base_model_prefix = "bilma"
38
+
39
+ def __init__(self, config):
40
+ self.seq_max_length = config.seq_max_length
41
+ self.include_top = config.include_top
42
+ self.add_head = config.add_head
43
+ super().__init__(config)
44
+
45
+ self.model = bilma(num_enc=config.num_hidden_layers,
46
+ embed_dim=config.hidden_size,
47
+ max_length=config.seq_max_length,
48
+ num_heads=config.num_attention_heads,
49
+ ff_dim=config.hidden_size,
50
+ vocab_size=config.vocab_size,
51
+ rate=config.hidden_dropout_prob,
52
+ include_top = config.include_top,
53
+ add_head = config.add_head,
54
+ pooling = config.pooling)
55
+
56
+ @property
57
+ def dummy_inputs(self) -> Dict[str, tf.Tensor]:
58
+
59
+ dummies = {}
60
+ for key, spec in self.input_signature.items():
61
+ dummy_shape = [dim if dim is not None else 2 for dim in spec.shape]
62
+ if spec.shape[0] is None:
63
+ dummy_shape[0] = 1
64
+ dummies[key] = tf.ones(shape=dummy_shape, dtype=spec.dtype)
65
+
66
+
67
+ return dummies
68
+
69
+ @property
70
+ def input_signature(self) -> Dict[str, tf.TensorSpec]:
71
+ sig = {}
72
+ sig["input_ids"] = tf.TensorSpec([None, self.seq_max_length], tf.int32, name="input_ids")
73
+ return sig
74
+
75
+
76
+ def call(self, inputs):
77
+ if isinstance(inputs, Dict) or isinstance(inputs, BatchEncoding):
78
+ ins = tf.cast(inputs["input_ids"], tf.float32)
79
+ else:
80
+ ins = inputs
81
+ if self.include_top:
82
+ output = {"logits":self.model(ins)}
83
+ else:
84
+ if self.add_head is None:
85
+ output = {"last_hidden_state":self.model(ins)}
86
+ else:
87
+ output = {"label":self.model(ins)}
88
+ return output
89
+
90
+ def get_loss_function():
91
+ return loss_funtion()
92
+
93
+ def get_acc_function():
94
+ return accuracy_function()
95
+
96
+
97
+ # copied from bilma_model.py
98
+ # --------------------------
99
+
100
+ def loss_function(ignore_id=0):
101
+ loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
102
+ def loss(real, pred):
103
+ mask = tf.math.logical_not(tf.math.equal(real, ignore_id))
104
+ loss_ = loss_object(real, pred)
105
+ mask = tf.cast(mask, dtype=loss_.dtype)
106
+ loss_ *= mask
107
+ sum_ = tf.reduce_sum(mask,axis=1)
108
+
109
+ loss_ = tf.math.divide_no_nan(tf.reduce_sum(loss_, axis=1), sum_)
110
+ return loss_
111
+ return loss
112
+
113
+ def accuracy_function(ignore_id=0):
114
+ def acc_mlm(real, pred):
115
+ accuracies = tf.equal(tf.cast(real, tf.int64), tf.argmax(pred, axis=2))
116
+
117
+ mask = tf.math.logical_not(tf.math.equal(real, ignore_id))
118
+ accuracies = tf.math.logical_and(mask, accuracies)
119
+
120
+ accuracies = tf.cast(accuracies, dtype=tf.float32)
121
+ mask = tf.cast(mask, dtype=tf.float32)
122
+ return tf.math.divide_no_nan(tf.reduce_sum(accuracies), tf.reduce_sum(mask))
123
+ return acc_mlm
124
+
125
+ def mean_vectors(inputs, enc_vectors, max_length):
126
+ p = tf.where(inputs == 3)
127
+ pos = tf.transpose(p)[1]
128
+ C = tf.sequence_mask(pos, maxlen=max_length, dtype=tf.float32)
129
+ C = tf.reshape(C, (-1, max_length, 1))
130
+ S = tf.reduce_sum(enc_vectors * C, 1)
131
+ x = S / tf.expand_dims(tf.cast(pos, tf.float32), (1))
132
+ return x
133
+
134
+ def mean_diff_vectors(inputs, enc_vectors, max_length):
135
+ p = tf.where(inputs == 3)
136
+ pos = tf.transpose(p)[1]
137
+ C = tf.sequence_mask(pos, maxlen=max_length, dtype=tf.float32)
138
+ C = tf.reshape(C, (-1, max_length, 1))
139
+ vecs = enc_vectors * C
140
+ S = tf.reduce_sum(vecs, 1)
141
+ mu = S / tf.expand_dims(tf.cast(pos, tf.float32), (1))
142
+ x = tf.reduce_sum(mu - vecs, 1) / tf.expand_dims(tf.cast(pos, tf.float32), (1))
143
+ return x
144
+
145
+ def max_vectors(inputs, enc_vectors, max_length):
146
+ p = tf.where(inputs == 3)
147
+ pos = tf.transpose(p)[1]
148
+ C = tf.sequence_mask(pos, maxlen=max_length, dtype=tf.float32)
149
+ C = tf.reshape(C, (-1, max_length, 1))
150
+ x = tf.reduce_max(enc_vectors * C, 1)
151
+ return x
152
+
153
+ def cls_vectors(inputs, enc_vectors, max_length):
154
+ x = tf.squeeze(enc_vectors[:, 0:1, :], axis=1)
155
+ return x
156
+
157
+
158
+ def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1, include_top=True, add_head=None, pooling=None):
159
+ capt_inputs_ids = Input(shape=(max_length, ), name='input_ids')
160
+ capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
161
+ capt_inputs = capt_embedding(capt_inputs_ids)
162
+
163
+ enc = Encoder(num_enc, embed_dim, max_length, num_heads, ff_dim, rate=rate, name="bilma/encoder")
164
+ enc_output = enc(capt_inputs)
165
+ if include_top:
166
+ fin_output = Dense(vocab_size, use_bias=True, name="bilma/dense_final")(enc_output)
167
+ else:
168
+ x = enc_output
169
+ if pooling == "mean":
170
+ x = mean_vectors(capt_inputs_ids, x, max_length)
171
+ elif pooling == "cls":
172
+ x = cls_vectors(capt_inputs_ids, x, max_length)
173
+ elif pooling == "max":
174
+ x = max_vectors(capt_inputs_ids, x, max_length)
175
+
176
+ if add_head is None:
177
+ fin_output = x
178
+ else:
179
+ for i, m in enumerate(add_head[:-1]):
180
+ x = Dense(m, use_bias=True, activation="relu", name=f"bilma/dense_ex_{i}")(x)
181
+ fin_output = Dense(add_head[-1], use_bias=True, activation="softmax", name=f"bilma/dense_ex_final")(x)
182
+
183
+ caption_model = Model(inputs=capt_inputs_ids, outputs=fin_output, name="bilma_model")
184
+ return caption_model
185
+
186
+ def load(model_file):
187
+ custom_objects={"EncoderBlock": EncoderBlock,
188
+ "Encoder": Encoder,
189
+ "loss": loss_function(),
190
+ "acc_mlm":accuracy_function(),
191
+ }
192
+ return load_model(model_file, custom_objects=custom_objects)
193
+
194
+
195
+ #
196
+ # Copied from transformer_text.py
197
+ # -------------------------------
198
+ class EncoderBlock(Layer):
199
+ def __init__(self, layer_num, patch_dim, num_heads, ff_dim, rate=0.1, **kwargs):
200
+ super(EncoderBlock, self).__init__(**kwargs)
201
+ self.ln = layer_num
202
+ self.p_d = patch_dim
203
+ self.n_h = num_heads
204
+ self.f_d = ff_dim
205
+ self.rate = rate
206
+
207
+ self.att = MultiHeadAttention(num_heads=num_heads, key_dim=patch_dim, name=f"bilma/MHA_{layer_num}")
208
+ self.ffn = Sequential(
209
+ #[Conv1D(ff_dim, kernel_size=1, activation=tf.nn.gelu),
210
+ # Conv1D(patch_dim, kernel_size=1),]
211
+ [Dense(ff_dim, activation=tf.nn.gelu, name=f"bilma/dense1_{layer_num}"),
212
+ Dense(patch_dim, name=f"bilma/dense2_{layer_num}")]
213
+ )
214
+ #self.layernorm0 = LayerNormalization(epsilon=1e-6)
215
+ self.layernorm1 = LayerNormalization(epsilon=1e-6, name=f"ln1_{layer_num}")
216
+ self.layernorm2 = LayerNormalization(epsilon=1e-6, name=f"ln2_{layer_num}")
217
+ self.dropout1 = Dropout(rate)
218
+ self.dropout2 = Dropout(rate)
219
+
220
+ def get_config(self):
221
+ config = super(EncoderBlock, self).get_config()
222
+ config.update({"layer_num":self.ln, "patch_dim":self.p_d, "num_heads":self.n_h, "ff_dim":self.f_d, "rate":self.rate})
223
+ return config
224
+
225
+ def call(self, inputs, training=False):
226
+ #inputs = self.layernorm0(inputs)
227
+ attn_output = self.att(inputs, inputs)
228
+ attn_output = self.dropout1(attn_output, training=training)
229
+ out1 = self.layernorm1(add([inputs, attn_output]))
230
+ ffn_output = self.ffn(out1)
231
+ ffn_output = self.dropout2(ffn_output, training=training)
232
+ return self.layernorm2(add([out1, ffn_output]))
233
+
234
+
235
+ class DecoderBlock(Layer):
236
+ def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
237
+ super(DecoderBlock, self).__init__(**kwargs)
238
+ self.e_d = embed_dim
239
+ self.n_h = num_heads
240
+ self.f_d = ff_dim
241
+ self.rate = rate
242
+
243
+ self.att1 = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
244
+ self.att2 = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
245
+ self.ffn = Sequential(
246
+ #[Conv1D(ff_dim, kernel_size=1, activation=tf.nn.gelu),
247
+ # Conv1D(embed_dim, kernel_size=1),]
248
+ [Dense(ff_dim, activation=tf.nn.gelu),
249
+ Dense(embed_dim),]
250
+ )
251
+ self.layernorm1 = LayerNormalization(epsilon=1e-6)
252
+ self.layernorm2 = LayerNormalization(epsilon=1e-6)
253
+ self.dropout1 = Dropout(rate)
254
+ self.dropout2 = Dropout(rate)
255
+ self.dropout3 = Dropout(rate)
256
+
257
+ def get_config(self):
258
+ config = super(DecoderBlock, self).get_config()
259
+ config.update({"embed_dim":self.e_d, "num_heads":self.n_h, "ff_dim":self.f_d, "rate":self.rate})
260
+ return config
261
+
262
+ def call(self, inputs, encoder_output, look_ahead_mask, padding_mask, training=None):
263
+ y, attn_output1 = self.att1(inputs, inputs, attention_mask=look_ahead_mask, return_attention_scores=True)
264
+ y = self.dropout1(y, training=training)
265
+ y = add([inputs, y])
266
+ out1 = self.layernorm1(y)
267
+
268
+ y, attn_encoder = self.att2(out1, encoder_output, attention_mask=padding_mask, return_attention_scores=True)
269
+ y = self.dropout2(y, training=training)
270
+ y = add([out1, y])
271
+ out2 = self.layernorm1(y)
272
+
273
+ ffn_output = self.ffn(out2)
274
+ ffn_output = self.dropout3(ffn_output, training=training)
275
+ final_output = self.layernorm2(out2 + ffn_output)
276
+
277
+ return final_output, attn_output1, attn_encoder
278
+
279
+ class Encoder(Layer):
280
+ def __init__(self, n, embed_dim, max_length, num_heads, ff_dim, rate=0.1, **kwargs):
281
+ super(Encoder, self).__init__(**kwargs)
282
+ self.n = n
283
+ self.embed_dim = embed_dim
284
+ self.max_length = max_length
285
+ self.n_h = num_heads
286
+ self.f_d = ff_dim
287
+ self.rate = rate
288
+ self._layers = [EncoderBlock(i, embed_dim, num_heads, ff_dim, rate=0.1, name=f"enc_block_{i}") for i in range(n)]
289
+ self.pe = positional_encoding(self.max_length, self.embed_dim)
290
+
291
+ def get_config(self):
292
+ config = super(Encoder, self).get_config()
293
+ config.update({"n": self.n, "embed_dim":self.embed_dim, "max_length": self.max_length, "num_heads":self.n_h, "ff_dim":self.f_d, "rate":self.rate})
294
+ return config
295
+
296
+ def call(self, x, training=False):
297
+ x *= tf.math.sqrt(tf.cast(self.embed_dim, tf.float32))
298
+ x = x + self.pe[:, :tf.shape(x)[1], :]
299
+ for layer in self._layers:
300
+ x = layer(x, training)
301
+ return x
302
+
303
+
304
+ class Decoder(Layer):
305
+ def __init__(self, n, embed_dim, max_length, num_heads, ff_dim, rate=0.1, **kwargs):
306
+ super(Decoder, self).__init__(**kwargs)
307
+ self.n = n
308
+ self.embed_dim = embed_dim
309
+ self.max_length = max_length
310
+ self.n_h = num_heads
311
+ self.f_d = ff_dim
312
+ self.rate = rate
313
+ self._layers = [DecoderBlock(embed_dim, num_heads, ff_dim, rate=0.1) for _ in range(n)]
314
+ self.pe = positional_encoding(self.max_length, self.embed_dim)
315
+
316
+ def get_config(self):
317
+ config = super(Decoder, self).get_config()
318
+ config.update({"n": self.n, "embed_dim":self.embed_dim, "max_length": self.max_length, "num_heads":self.n_h, "ff_dim":self.f_d, "rate":self.rate})
319
+ return config
320
+
321
+ def call(self, x, encoder_output, look_ahead_mask, padding_mask, training):
322
+ x *= tf.math.sqrt(tf.cast(self.embed_dim, tf.float32))
323
+ x = x + self.pe[:, :tf.shape(x)[1], :]
324
+
325
+ for layer in self._layers:
326
+ x, self_att, enc_att = layer(x, encoder_output, look_ahead_mask, padding_mask, training)
327
+
328
+ return x
329
+
330
+
331
+
332
+
333
+ # =========================================
334
+ # M A S K S
335
+ # =========================================
336
+ def create_padding_mask(seq):
337
+ """
338
+ For self-attention
339
+ seq shape(bs, max_length, emb_dim)
340
+ output shape (bs, max_length, max_length)
341
+ """
342
+ mask = tf.cast(tf.not_equal(seq, 0), tf.bool)
343
+ mask = tf.reduce_any(mask, 2)
344
+ mask = tf.repeat(mask, seq.shape[1], 0)
345
+ mask = tf.reshape(mask, (-1,seq.shape[1], seq.shape[1]))
346
+ return tf.cast(mask, tf.float32)
347
+
348
+
349
+ def create_cross_padding_mask(seq, target_seq):
350
+ """
351
+ For cross-attention
352
+ seq shape(bs, k, image_features)
353
+ target_seq(bs, max_length, emb_dim)
354
+ output shape (bs, max_length, k)
355
+ """
356
+ mask = tf.cast(tf.not_equal(target_seq, 0), tf.bool)
357
+ mask = tf.reduce_any(mask, 2)
358
+ mask = tf.repeat(mask, seq.shape[1], 0)
359
+ mask = tf.reshape(mask, (-1, tf.shape(seq)[1], tf.shape(target_seq)[1]))
360
+ mask = tf.transpose(mask, [0, 2, 1])
361
+ return mask
362
+
363
+
364
+ def create_look_ahead_mask(seq):
365
+ """
366
+ seq shape(bs, max_length, emb_dim)
367
+ output 2D matrix of shape (bs, max_length, max_length) with ones on the diagonal and below.
368
+ """
369
+ size = seq.shape[1]
370
+ mask = tf.linalg.band_part(tf.ones((size, size)), -1, 0)
371
+ mask = tf.expand_dims(mask, 0)
372
+ mask = tf.repeat(mask, tf.shape(seq)[0], 0)
373
+ return mask
374
+
375
+
376
+ def create_masks(seq, target_seq):
377
+ decoder_mask = create_padding_mask(target_seq)
378
+ decoder_mask *= create_look_ahead_mask(target_seq)
379
+ cross_att_mask = create_cross_padding_mask(seq, target_seq)
380
+ return decoder_mask, cross_att_mask
381
+
382
+
383
+ def create_masks_looking_ahead(seq, target_seq):
384
+ decoder_mask = create_padding_mask(target_seq)
385
+ cross_att_mask = create_cross_padding_mask(seq, target_seq)
386
+ return decoder_mask, cross_att_mask
387
+
388
+ # =========================================
389
+ # P O S I T I O N A L E N C O D I N G
390
+ # =========================================
391
+ def get_angles(pos, i, d_model):
392
+ angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
393
+ return pos * angle_rates
394
+
395
+ @tf.autograph.experimental.do_not_convert
396
+ def positional_encoding(position, d_model):
397
+ angle_rads = get_angles(np.arange(position)[:, np.newaxis],
398
+ np.arange(d_model)[np.newaxis, :],
399
+ d_model)
400
+
401
+ # apply sin to even indices in the array; 2i
402
+ angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
403
+
404
+ # apply cos to odd indices in the array; 2i+1
405
+ angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
406
+
407
+ pos_encoding = angle_rads[np.newaxis, ...]
408
+
409
+ return tf.cast(pos_encoding, dtype=tf.float32)
410
+
411
+ class PatchEncoder(Layer):
412
+ def __init__(self, num_patches, projection_dim, **kwargs):
413
+ super(PatchEncoder, self).__init__(**kwargs)
414
+ self.num_patches = num_patches
415
+ self.projection_dim = projection_dim
416
+ self.projection = Dense(units=projection_dim)
417
+ self.position_embedding = Embedding(
418
+ input_dim=num_patches, output_dim=projection_dim
419
+ )
420
+
421
+ def get_config(self):
422
+ config = super(PatchEncoder, self).get_config()
423
+ config.update({"num_patches": self.num_patches, "projection_dim":self.projection_dim})
424
+ return config
425
+
426
+ def call(self, patch):
427
+ positions = tf.range(start=0, limit=self.num_patches, delta=1)
428
+ encoded = self.projection(patch) + self.position_embedding(positions)
429
+ return encoded
430
+
431
+
432
+
433
+
434
+
435
+
436
+
437
+
438
+
439
+
440
+
441
+
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7ebeece34f34d796b0b483adcfc34d966b6829c284d0ff8be986dd86e7313b9
3
  size 98400100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3adf655ed163d94a182df5698d86300ea8a65593bc3e3502dfbca39940ad2fa9
3
  size 98400100