Spaces:

nnmthuw
/

CoGaiMoDuong

Sleeping

App Files Files Community

nnmthuw commited on Jan 16, 2024

Commit

60b414a

1 Parent(s): 9c2ca71

commit all

Browse files

Files changed (4) hide show

app.py +27 -22
hid512_decoder_att_epoch_20.pt → decoderatt_epoch_35.pt +2 -2
hid512_encoder_att_epoch_20.pt → encoderatt_epoch_35.pt +2 -2
temp.ipynb +0 -569

app.py CHANGED Viewed

@@ -314,25 +314,27 @@ HID_DIM = 512
 # Load our Model Translation
 ENCODER = EncoderAtt(INPUT_DIM, HID_DIM)
-#ENCODER.load_state_dict(torch.load("hid512_encoder_att_epoch_20.pt"), map_location=torch.device('cpu'))
 DECODER = DecoderAtt(HID_DIM, OUTPUT_DIM)
-#DECODER.load_state_dict(torch.load("hid512_decoder_att_epoch_20.pt"), map_location=torch.device('cpu'))
-def evaluate_final_model(encoder, decoder, sentence, vocab_source, vocab_target, disable=False):
     encoder.eval()
     decoder.eval()
     with torch.no_grad():
-        input_tensor = (
-            vocab_source.corpus_to_tensor([sentence], disable=disable)[0]
-            .view(1, -1)
-            .to(device)
-        )
         encoder_outputs, encoder_hidden = encoder(input_tensor)
-        decoder_outputs, decoder_hidden, decoder_attn = decoder(
-            encoder_outputs, encoder_hidden
-        )
         _, topi = decoder_outputs.topk(1)
         decoded_ids = topi.squeeze()
@@ -340,20 +342,23 @@ def evaluate_final_model(encoder, decoder, sentence, vocab_source, vocab_target,
         decoded_words = []
         for idx in decoded_ids:
             if idx.item() == vocab_target.eos_id:
-                decoded_words.append("<eos>")
                 break
             decoded_words.append(vocab_target.id2word[idx.item()])
     return decoded_words, decoder_attn
-def my_translation(sentence):
     output_words, _ = evaluate_final_model(sentence, ENCODER, DECODER, VOCAB_SOURCE, VOCAB_TARGET, disable= True)
-    output_words = output_words.remove("<pad>")
-    output_words = output_words.remove("<unk>")
-    output_words = output_words.remove("<sos>")
-    output_words = output_words.remove("<eos>")
-    return ' '.join(output_words[1:-1]).capitalize()
 def envit5_translation(text):
@@ -366,10 +371,10 @@ def envit5_translation(text):
 def translation(text):
     if not text.endswith(('.', '!', '?')):
         text = text + '.'
-    #output1 = my_translation(text)
-    output1 = "Something"
     output2 = envit5_translation(text)
     return (output1, output2)
@@ -401,4 +406,4 @@ if __name__ == "__main__":
         ]
     )
-    demo.launch()

 # Load our Model Translation
 ENCODER = EncoderAtt(INPUT_DIM, HID_DIM)
+ENCODER.load_state_dict(torch.load("encoderatt_epoch_35.pt", map_location=torch.device('cpu')))
 DECODER = DecoderAtt(HID_DIM, OUTPUT_DIM)
+DECODER.load_state_dict(torch.load("decoderatt_epoch_35.pt", map_location=torch.device('cpu')))
+def evaluate_final_model(sentence, encoder, decoder, vocab_source, vocab_target, disable = False):
+    """ Evaluation Model
+    @param encoder (EncoderAtt)
+    @param decoder (DecoderAtt)
+    @param sentence (str)
+    @param vocab_source (Vocabulary)
+    @param vocab_target (Vocabulary)
+    @param disable (bool)
+    """
     encoder.eval()
     decoder.eval()
     with torch.no_grad():
+        input_tensor = vocab_source.corpus_to_tensor([sentence], disable = disable)[0].view(1,-1).to(device)
         encoder_outputs, encoder_hidden = encoder(input_tensor)
+        decoder_outputs, decoder_hidden, decoder_attn = decoder(encoder_outputs, encoder_hidden)
         _, topi = decoder_outputs.topk(1)
         decoded_ids = topi.squeeze()
         decoded_words = []
         for idx in decoded_ids:
             if idx.item() == vocab_target.eos_id:
+                decoded_words.append('<eos>')
                 break
             decoded_words.append(vocab_target.id2word[idx.item()])
     return decoded_words, decoder_attn
+def translate_sentence(sentence):
     output_words, _ = evaluate_final_model(sentence, ENCODER, DECODER, VOCAB_SOURCE, VOCAB_TARGET, disable= True)
+    if "<pad>" in output_words:
+      output_words.remove("<pad>")
+    if "<unk>" in output_words:
+      output_words.remove("<unk>")
+    if "<sos>" in output_words:
+      output_words.remove("<sos>")
+    if "<eos>" in output_words:
+      output_words.remove("<eos>")
+    return ' '.join(output_words).capitalize()
 def envit5_translation(text):
 def translation(text):
+    output1 = translate_sentence(text)
     if not text.endswith(('.', '!', '?')):
         text = text + '.'
     output2 = envit5_translation(text)
     return (output1, output2)
         ]
     )
+    demo.launch(share = True)

hid512_decoder_att_epoch_20.pt → decoderatt_epoch_35.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b13f49e00d60a51226db3a66e343ef3b73eccf06e0efe771cac417e1994a706
-size 40323250

 version https://git-lfs.github.com/spec/v1
+oid sha256:dee556d5e874646355b36d8d2fa94daf70d1da8684f0d95e4eb5edee4fe1b881
+size 43042290

hid512_encoder_att_epoch_20.pt → encoderatt_epoch_35.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec38b650930515f30086a04a16285c88430ceed352cfbd52cc27e34b4283221a
-size 16096464

 version https://git-lfs.github.com/spec/v1
+oid sha256:0879cb7c5a5359360b70195e61c9e91cd9f8caa0c6296f5924a7b9530f1350b0
+size 16437536

temp.ipynb DELETED Viewed

@@ -1,569 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "WARNING:tensorflow:From c:\\Users\\THU\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "import gradio as gr\n",
-    "from transformers import pipeline    \n",
-    "import re\n",
-    "import pickle \n",
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "from torchtext.transforms import PadTransform\n",
-    "from torch.utils.data import Dataset, DataLoader\n",
-    "from torch.nn import functional as F\n",
-    "from tqdm import tqdm\n",
-    "from underthesea import word_tokenize, text_normalize"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Running on local URL:  http://127.0.0.1:7864\n",
-      "\n",
-      "To create a public link, set `share=True` in `launch()`.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div><iframe src=\"http://127.0.0.1:7864/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": []
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import gradio as gr\n",
-    "\n",
-    "def translation(text):\n",
-    "    output1 = 1\n",
-    "    output2 = 2\n",
-    "    #output3 = finetune_BERT(text)\n",
-    "\n",
-    "    return (output1, output2)\n",
-    "\n",
-    "\n",
-    "\n",
-    "examples = [[\"Input: Hello guys\"], \n",
-    "            [\"Output: Xin chào các bạn\"]]\n",
-    "\n",
-    "demo = gr.Interface(\n",
-    "    theme = gr.themes.Base(),\n",
-    "    fn=translation,\n",
-    "    title=\"Co Gai Mo Duong\",\n",
-    "    description=\"\"\"\n",
-    "    ## Machine Translation: English to Vietnamese\n",
-    "    \"\"\",\n",
-    "    examples=examples,\n",
-    "    inputs=[\n",
-    "        gr.Textbox(\n",
-    "            lines=5, placeholder=\"Enter text\", label=\"Input\"\n",
-    "        )\n",
-    "    ],\n",
-    "    outputs=[\n",
-    "        gr.Textbox(\n",
-    "            \"text\", label=\"Our Machine Translation\"\n",
-    "        ),\n",
-    "        gr.Textbox(\n",
-    "            \"text\", label=\"VietAI Machine Translation\"\n",
-    "        )\n",
-    "    ]\n",
-    ")\n",
-    "\n",
-    "demo.launch(shared = True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Build Vocabulary\n",
-    "MAX_LENGTH = 30\n",
-    "#device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
-    "device = 'cpu'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class Vocabulary:\n",
-    "    \"\"\"The Vocabulary class is used to record words, which are used to convert\n",
-    "    text to numbers and vice versa.\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def __init__(self, lang=\"vi\"):\n",
-    "        self.lang = lang\n",
-    "        self.word2id = dict()\n",
-    "        self.word2id[\"<sos>\"] = 0  # Start of Sentece Token\n",
-    "        self.word2id[\"<eos>\"] = 1  # End of Sentence Token\n",
-    "        self.word2id[\"<unk>\"] = 2  # Unknown Token\n",
-    "        self.word2id[\"<pad>\"] = 3  # Pad Token\n",
-    "        self.sos_id = self.word2id[\"<sos>\"]\n",
-    "        self.eos_id = self.word2id[\"<eos>\"]\n",
-    "        self.unk_id = self.word2id[\"<unk>\"]\n",
-    "        self.pad_id = self.word2id[\"<pad>\"]\n",
-    "        self.id2word = {v: k for k, v in self.word2id.items()}\n",
-    "        self.pad_transform = PadTransform(max_length = MAX_LENGTH, pad_value = self.pad_id)\n",
-    "\n",
-    "    def __getitem__(self, word):\n",
-    "        \"\"\"Return ID of word if existed else return ID unknown token\n",
-    "        @param word (str)\n",
-    "        \"\"\"\n",
-    "        return self.word2id.get(word, self.unk_id)\n",
-    "\n",
-    "    def __contains__(self, word):\n",
-    "        \"\"\"Return True if word in Vocabulary else return False\n",
-    "        @param word (str)\n",
-    "        \"\"\"\n",
-    "        return word in self.word2id\n",
-    "\n",
-    "    def __len__(self):\n",
-    "        \"\"\"\n",
-    "        Return number of tokens(include sos, eos, unk and pad tokens) in Vocabulary\n",
-    "        \"\"\"\n",
-    "        return len(self.word2id)\n",
-    "\n",
-    "    def lookup_tokens(self, word_indexes: list):\n",
-    "        \"\"\"Return the list of words by lookup by ID\n",
-    "        @param word_indexes (list(int))\n",
-    "        @return words (list(str))\n",
-    "        \"\"\"\n",
-    "        return [self.id2word[word_index] for word_index in word_indexes]\n",
-    "\n",
-    "    def add(self, word):\n",
-    "        \"\"\"Add word to vocabulary\n",
-    "        @param word (str)\n",
-    "        @return index (str): index of the word just added\n",
-    "        \"\"\"\n",
-    "        if word not in self:\n",
-    "            word_index = self.word2id[word] = len(self.word2id)\n",
-    "            self.id2word[word_index] = word\n",
-    "            return word_index\n",
-    "        else:\n",
-    "            return self[word]\n",
-    "\n",
-    "    def preprocessing_sent(self, sent, lang=\"en\"):\n",
-    "        \"\"\"Preprocessing a sentence (depend on language english or vietnamese)\"\"\"\n",
-    "\n",
-    "        if (lang == \"en\") or (lang == \"eng\") or (lang == \"english\"):\n",
-    "            # Remove unnecessary space\n",
-    "            sent = re.sub(\" +\", \" \", sent)\n",
-    "\n",
-    "            # Replace short form\n",
-    "            sent = re.sub(\"&apos;m \", \"am \", sent)\n",
-    "            # Dont know to preprocess with possessive case\n",
-    "            sent = re.sub(\"&apos;s \", \"is \", sent)\n",
-    "            sent = re.sub(\"&apos;re \", \"are \", sent)\n",
-    "            sent = re.sub(\"&apos;ve \", \"have \", sent)\n",
-    "            sent = re.sub(\"&apos;ll \", \"will \", sent)\n",
-    "            sent = re.sub(\"&apos;d \", \"would \", sent)\n",
-    "\n",
-    "            sent = re.sub(\"aren &apos;t\", \"are not\", sent)\n",
-    "            sent = re.sub(\"isn &apos;t\", \"is not\", sent)\n",
-    "            sent = re.sub(\"don &apos;t\", \"do not\", sent)\n",
-    "            sent = re.sub(\"doesn &apos;t\", \"does not\", sent)\n",
-    "            sent = re.sub(\"wasn &apos;t\", \"was not\", sent)\n",
-    "            sent = re.sub(\"weren &apos;t\", \"were not\", sent)\n",
-    "            sent = re.sub(\"won &apos;t\", \"will not\", sent)\n",
-    "            sent = re.sub(\"can &apos;t\", \"can not\", sent)\n",
-    "            sent = re.sub(\"let &apos;s\", \"let us\", sent)\n",
-    "\n",
-    "        else:\n",
-    "            # Package underthesea.text_normalize support to normalize vietnamese\n",
-    "            sent = text_normalize(sent)\n",
-    "\n",
-    "        sent = re.sub(\"&apos;\", \"'\", sent)\n",
-    "        sent = re.sub(\"&quot;\", '\"', sent)\n",
-    "        sent = re.sub(\"&#91;\", \"[\", sent)\n",
-    "        sent = re.sub(\"&#93;\", \"]\", sent)\n",
-    "        \n",
-    "        # Lowercase sentence and remove space at beginning and ending\n",
-    "        return sent.lower().strip()\n",
-    "\n",
-    "    def tokenize_corpus(self, corpus, disable=False):\n",
-    "        \"\"\"Split the documents of the corpus into words\n",
-    "        @param corpus (list(str)): list of documents\n",
-    "        @return tokenized_corpus (list(list(str))): list of words\n",
-    "        \"\"\"\n",
-    "        if not disable:\n",
-    "            print(\"Tokenize the corpus...\")\n",
-    "        tokenized_corpus = list()\n",
-    "        for document in tqdm(corpus, disable=disable):\n",
-    "            tokenized_document = [\"<sos>\"] + self.preprocessing_sent(document).split(\" \") + [\"<eos>\"]\n",
-    "            tokenized_corpus.append(tokenized_document)\n",
-    "        return tokenized_corpus\n",
-    "\n",
-    "    def corpus_to_tensor(self, corpus, is_tokenized=False, disable=False):\n",
-    "        \"\"\"Convert corpus to a list of indices tensor\n",
-    "        @param corpus (list(str) if is_tokenized==False else list(list(str)))\n",
-    "        @param is_tokenized (bool)\n",
-    "        @return indicies_corpus (list(tensor))\n",
-    "        \"\"\"\n",
-    "        if is_tokenized:\n",
-    "            tokenized_corpus = corpus\n",
-    "        else:\n",
-    "            tokenized_corpus = self.tokenize_corpus(corpus, disable=disable)\n",
-    "        indicies_corpus = list()\n",
-    "        for document in tqdm(tokenized_corpus, disable=disable):\n",
-    "            indicies_document = torch.tensor(\n",
-    "                list(map(lambda word: self[word], document)), dtype=torch.int64\n",
-    "            )\n",
-    "            \n",
-    "            indicies_corpus.append(self.pad_transform(indicies_document))\n",
-    "\n",
-    "        return indicies_corpus\n",
-    "\n",
-    "    def tensor_to_corpus(self, tensor, disable=False):\n",
-    "        \"\"\"Convert list of indices tensor to a list of tokenized documents\n",
-    "        @param indicies_corpus (list(tensor))\n",
-    "        @return corpus (list(list(str)))\n",
-    "        \"\"\"\n",
-    "        corpus = list()\n",
-    "        for indicies in tqdm(tensor, disable=disable):\n",
-    "            document = list(map(lambda index: self.id2word[index.item()], indicies))\n",
-    "            corpus.append(document)\n",
-    "\n",
-    "        return corpus"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def create_input_emb_layer():\n",
-    "    num_embeddings, embedding_dim = 32998, 100\n",
-    "    emb_layer = nn.Embedding(num_embeddings, embedding_dim)\n",
-    "    emb_layer.weight.requires_grad = False\n",
-    "\n",
-    "    return emb_layer, embedding_dim\n",
-    "\n",
-    "def create_output_emb_layer():\n",
-    "    num_embeddings, embedding_dim = 15405, 100\n",
-    "    emb_layer = nn.Embedding(num_embeddings, embedding_dim)\n",
-    "    emb_layer.weight.requires_grad = False\n",
-    "\n",
-    "    return emb_layer, embedding_dim\n",
-    "    \n",
-    "class EncoderRNN(nn.Module):\n",
-    "    def __init__(self, input_dim, hidden_dim, dropout = 0.2):\n",
-    "        super(EncoderRNN, self).__init__()\n",
-    "        \n",
-    "        self.hidden_dim = hidden_dim\n",
-    "        #self.embedding = nn.Embedding(input_dim, hidden_dim)\n",
-    "        # Đổi thành input embedding\n",
-    "        self.embedding, self.embedding_dim = create_input_emb_layer()\n",
-    "        self.gru = nn.GRU(self.embedding_dim, hidden_dim, batch_first=True)\n",
-    "        self.dropout = nn.Dropout(dropout)\n",
-    "\n",
-    "    def forward(self, src):\n",
-    "        embedded = self.dropout(self.embedding(src))\n",
-    "        output, hidden = self.gru(embedded)\n",
-    "        return output, hidden\n",
-    "       \n",
-    "class BahdanauAttention(nn.Module):\n",
-    "    def __init__(self, hidden_size):\n",
-    "        super(BahdanauAttention, self).__init__()\n",
-    "        self.Wa = nn.Linear(hidden_size, hidden_size)\n",
-    "        self.Ua = nn.Linear(hidden_size, hidden_size)\n",
-    "        self.Va = nn.Linear(hidden_size, 1)\n",
-    "\n",
-    "    def forward(self, query, keys):\n",
-    "        scores = self.Va(torch.tanh(self.Wa(query) + self.Ua(keys)))\n",
-    "        scores = scores.squeeze(2).unsqueeze(1)\n",
-    "\n",
-    "        weights = F.softmax(scores, dim=-1)\n",
-    "        context = torch.bmm(weights, keys)\n",
-    "\n",
-    "        return context, weights\n",
-    "\n",
-    "class AttnDecoderRNN(nn.Module):\n",
-    "    def __init__(self, hidden_size, output_size, dropout_p=0.1):\n",
-    "        super(AttnDecoderRNN, self).__init__()\n",
-    "        # self.embedding = nn.Embedding(output_size, hidden_size)\n",
-    "        # Đổi thành output embedding\n",
-    "        self.embedding, self.embedding_dim = create_output_emb_layer()\n",
-    "        self.fc = nn.Linear(self.embedding_dim, hidden_size)\n",
-    "        self.attention = BahdanauAttention(hidden_size)\n",
-    "        self.gru = nn.GRU(2 * hidden_size, hidden_size, batch_first=True)\n",
-    "        self.out = nn.Linear(hidden_size, output_size)\n",
-    "        self.dropout = nn.Dropout(dropout_p)\n",
-    "\n",
-    "    def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):\n",
-    "        batch_size = encoder_outputs.size(0)\n",
-    "        decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(0)\n",
-    "        decoder_hidden = encoder_hidden\n",
-    "        decoder_outputs = []\n",
-    "        attentions = []\n",
-    "\n",
-    "        for i in range(MAX_LENGTH):\n",
-    "            decoder_output, decoder_hidden, attn_weights = self.forward_step(\n",
-    "                decoder_input, decoder_hidden, encoder_outputs\n",
-    "            )\n",
-    "            decoder_outputs.append(decoder_output)\n",
-    "            attentions.append(attn_weights)\n",
-    "\n",
-    "            if target_tensor is not None:\n",
-    "                # Teacher forcing: Feed the target as the next input\n",
-    "                decoder_input = target_tensor[:, i].unsqueeze(1) # Teacher forcing\n",
-    "            else:\n",
-    "                # Without teacher forcing: use its own predictions as the next input\n",
-    "                _, topi = decoder_output.topk(1)\n",
-    "                decoder_input = topi.squeeze(-1).detach()  # detach from history as input\n",
-    "\n",
-    "        decoder_outputs = torch.cat(decoder_outputs, dim=1)\n",
-    "        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)\n",
-    "        attentions = torch.cat(attentions, dim=1)\n",
-    "\n",
-    "        return decoder_outputs, decoder_hidden, attentions\n",
-    "\n",
-    "\n",
-    "    def forward_step(self, input, hidden, encoder_outputs):\n",
-    "        embedded =  self.dropout(self.fc(self.embedding(input)))\n",
-    "        \n",
-    "        query = hidden.permute(1, 0, 2)\n",
-    "        context, attn_weights = self.attention(query, encoder_outputs)\n",
-    "        input_gru = torch.cat((embedded, context), dim=2)\n",
-    "\n",
-    "        output, hidden = self.gru(input_gru, hidden)\n",
-    "        output = self.out(output)\n",
-    "\n",
-    "        return output, hidden, attn_weights"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<All keys matched successfully>"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "with open(\"vocab_source.pkl\", \"rb\") as file:\n",
-    "    VOCAB_SOURCE = pickle.load(file)\n",
-    "with open(\"vocab_target.pkl\", \"rb\") as file:\n",
-    "    VOCAB_TARGET = pickle.load(file)\n",
-    "\n",
-    "INPUT_DIM = len(VOCAB_SOURCE)\n",
-    "OUTPUT_DIM = len(VOCAB_TARGET)\n",
-    "HID_DIM = 512\n",
-    "\n",
-    "# Load our Model Translation\n",
-    "ENCODER = EncoderRNN(INPUT_DIM, HID_DIM)\n",
-    "ENCODER.load_state_dict(torch.load('encoder_att_epoch_16.pt'))\n",
-    "DECODER = AttnDecoderRNN(HID_DIM, OUTPUT_DIM)\n",
-    "DECODER.load_state_dict(torch.load('decoder_att_epoch_16.pt'))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def evaluate(encoder, decoder, sentence, vocab_source, vocab_target, disable = False):\n",
-    "    encoder.eval()\n",
-    "    decoder.eval()\n",
-    "    with torch.no_grad():\n",
-    "        input_tensor = vocab_source.corpus_to_tensor([sentence], disable = disable)[0].view(1,-1).to(device)\n",
-    "        \n",
-    "        encoder_outputs, encoder_hidden = encoder(input_tensor)\n",
-    "        decoder_outputs, decoder_hidden, decoder_attn = decoder(encoder_outputs, encoder_hidden)\n",
-    "\n",
-    "        _, topi = decoder_outputs.topk(1)\n",
-    "        decoded_ids = topi.squeeze()\n",
-    "\n",
-    "        decoded_words = []\n",
-    "        for idx in decoded_ids:\n",
-    "            if idx.item() == vocab_target.eos_id:\n",
-    "                decoded_words.append('<eos>')\n",
-    "                break\n",
-    "            decoded_words.append(vocab_target.id2word[idx.item()])\n",
-    "    return decoded_words, decoder_attn\n",
-    "\n",
-    "def my_translate_model(sentence):\n",
-    "    output_words, _ = evaluate(ENCODER, DECODER, sentence, VOCAB_SOURCE, VOCAB_TARGET, disable= True)\n",
-    "    \n",
-    "    return ' '.join(output_words[1:-1]).capitalize()+ '.'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 61,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'Tôi hy vọng các bạn sẽ có thể làm được giải pháp.'"
-      ]
-     },
-     "execution_count": 61,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "my_translate_model(\"I hope you will be better\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 60,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<All keys matched successfully>"
-      ]
-     },
-     "execution_count": 60,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "ENCODER = EncoderRNN(INPUT_DIM, HID_DIM)\n",
-    "ENCODER.load_state_dict(torch.load('encoder_att_epoch_16.pt'))\n",
-    "DECODER = AttnDecoderRNN(HID_DIM, OUTPUT_DIM)\n",
-    "DECODER.load_state_dict(torch.load('decoder_att_epoch_16.pt'))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "odict_keys(['embedding.weight', 'fc.weight', 'fc.bias', 'attention.Wa.weight', 'attention.Wa.bias', 'attention.Ua.weight', 'attention.Ua.bias', 'attention.Va.weight', 'attention.Va.bias', 'gru.weight_ih_l0', 'gru.weight_hh_l0', 'gru.bias_ih_l0', 'gru.bias_hh_l0', 'out.weight', 'out.bias'])"
-      ]
-     },
-     "execution_count": 48,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "torch.load('decoder_att_epoch_16.pt').keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<All keys matched successfully>"
-      ]
-     },
-     "execution_count": 52,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "DECODER.load_state_dict(torch.load('decoder_att_epoch_16.pt'))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<All keys matched successfully>"
-      ]
-     },
-     "execution_count": 57,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "DECODER = AttnDecoderRNN(HID_DIM, OUTPUT_DIM)\n",
-    "DECODER.load_state_dict(torch.load('decoder_att_epoch_16.pt'))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}