Add custom RNN model with attention

Browse files

Files changed (8) hide show

.gitattributes +1 -0
config.json +1 -0
modeling.py +51 -0
pytorch_model.bin +3 -0
sentencepiece.bpe.model +3 -0
special_tokens_map.json +15 -0
tokenizer.json +3 -0
tokenizer_config.json +54 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"model_type": "custom_rnn", "vocab_size": 250002, "hidden_size": 256, "output_size": 2, "cell_type": "RNN", "architecture": "SimpleRecurrentNetworkWithAttention"}

modeling.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import torch
+import torch.nn as nn
+class Attention(nn.Module):
+    def __init__(self, hidden_size):
+        super(Attention, self).__init__()
+        self.W1 = nn.Linear(hidden_size, hidden_size)
+        self.W2 = nn.Linear(hidden_size, hidden_size)
+        self.v = nn.Linear(hidden_size, 1, bias=False)
+    def forward(self, hidden, encoder_outputs):
+        sequence_len = encoder_outputs.shape[1]
+        hidden = hidden.unsqueeze(1).repeat(1, sequence_len, 1)
+        energy = torch.tanh(self.W1(encoder_outputs) + self.W2(hidden))
+        attention = self.v(energy).squeeze(2)
+        attention_weights = torch.softmax(attention, dim=1)
+        context = torch.bmm(attention_weights.unsqueeze(1), encoder_outputs).squeeze(1)
+        return context, attention_weights
+class SimpleRecurrentNetworkWithAttention(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size, cell_type='RNN', device='cpu'):
+        super(SimpleRecurrentNetworkWithAttention, self).__init__()
+        self.device = device
+        self.embedding = nn.Embedding(input_size, hidden_size)
+        self.attention = Attention(hidden_size * 2)
+        if cell_type == 'LSTM':
+            self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True, bidirectional=True)
+        elif cell_type == 'GRU':
+            self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True, bidirectional=True)
+        else:
+            self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True, bidirectional=True)
+        self.fc = nn.Linear(hidden_size * 2, output_size)
+    def forward(self, inputs):
+        embedded = self.embedding(inputs.to(self.device))
+        rnn_output, hidden = self.rnn(embedded)
+        if isinstance(hidden, tuple):
+            hidden = hidden[0]
+        hidden = torch.cat((hidden[-2], hidden[-1]), dim=1)
+        context, attention_weights = self.attention(hidden, rnn_output)
+        output = self.fc(context)
+        return output, attention_weights

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:213c6e85e32e29debb0eba7922836aa1ec1348d5d26c9629df9463d726aa6957
+size 259166474

sentencepiece.bpe.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ffb37461c391f096759f4a9bbbc329da0f36952f88bab061fcf84940c022e98
+size 17082999

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "250001": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "XLMRobertaTokenizer",
+  "unk_token": "<unk>"
+}