File size: 5,411 Bytes
34bd885 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import import_ipynb\n",
"from Encoder import Encoder, EncoderBlock, MultiHeadAttentionBlock, FeedForwardBlock, InputEmbeddingsLayer, PositionalEncodingLayer\n",
"from Decoder import Decoder, DecoderBlock, MultiHeadAttentionBlock, FeedForwardBlock, InputEmbeddingsLayer, PositionalEncodingLayer\n",
"\n",
"import torch\n",
"import torch.nn as nn \n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"class LinearLayer(nn.Module):\n",
"\n",
" def __init__(self, d_model: int, vocab_size: int) -> None:\n",
" super().__init__()\n",
" self.Linear = nn.Linear(d_model, vocab_size)\n",
"\n",
" def forward(self, x):\n",
" return self.Linear(x)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"class TransformerBlock(nn.Module):\n",
"\n",
" def __init__(self, encoder: Encoder, decoder: Decoder, source_embedding: InputEmbeddingsLayer, target_embedding: InputEmbeddingsLayer, source_position: PositionalEncodingLayer, target_position: PositionalEncodingLayer, Linear: LinearLayer) -> None:\n",
" super().__init__()\n",
" self.encoder = encoder \n",
" self.decoder = decoder \n",
" self.source_embedding = source_embedding\n",
" self.target_embedding = target_embedding\n",
" self.source_position = source_position\n",
" self.target_position = target_position\n",
" self.Linear = Linear\n",
"\n",
" def encode(self, source_language, source_mask):\n",
" source_language = self.source_embedding(source_language)\n",
" source_language = self.source_position(source_language)\n",
" return self.encoder(source_language, source_mask)\n",
"\n",
" def decode(self, Encoder_output, source_mask, target_language, target_mask):\n",
" target_language = self.target_embedding(target_language)\n",
" target_language = self.target_position(target_language)\n",
" return self.decoder(target_language, Encoder_output, source_mask, target_mask)\n",
"\n",
" def linear(self, x):\n",
" return self.Linear(x)\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def Transformer_Model(source_vocab_size: int, target_vocab_size: int, source_sequence_length: int, target_sequence_length: int, d_model: int = 512, Layers: int = 6, heads: int = 8, dropout: float = 0.1, d_ff: int = 2048)->TransformerBlock:\n",
"\n",
" source_embedding = InputEmbeddingsLayer(d_model, source_vocab_size)\n",
" target_embedding = InputEmbeddingsLayer(d_model, target_vocab_size)\n",
"\n",
" source_position = PositionalEncodingLayer(d_model, source_sequence_length, dropout)\n",
" target_position = PositionalEncodingLayer(d_model, target_sequence_length, dropout)\n",
"\n",
" EncoderBlocks = []\n",
" for _ in range(Layers):\n",
" encoder_self_attention_block = MultiHeadAttentionBlock(d_model, heads, dropout)\n",
" encoder_feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)\n",
" encoder_block = EncoderBlock(encoder_self_attention_block, encoder_feed_forward_block, dropout)\n",
" EncoderBlocks.append(encoder_block)\n",
"\n",
" DecoderBlocks = []\n",
" for _ in range(Layers):\n",
" decoder_self_attention_block = MultiHeadAttentionBlock(d_model, heads, dropout)\n",
" decoder_cross_attention_block = MultiHeadAttentionBlock(d_model, heads, dropout)\n",
" decoder_feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)\n",
" decoder_block = DecoderBlock(decoder_self_attention_block, decoder_cross_attention_block, decoder_feed_forward_block, dropout)\n",
" DecoderBlocks.append(decoder_block)\n",
"\n",
" encoder = Encoder(nn.ModuleList(EncoderBlocks))\n",
" decoder = Decoder(nn.ModuleList(DecoderBlocks))\n",
"\n",
" linear = LinearLayer(d_model, target_vocab_size)\n",
"\n",
" Transformer = TransformerBlock(encoder, decoder, source_embedding, target_embedding, source_position, target_position, linear)\n",
" \n",
" for t in Transformer.parameters():\n",
" if t.dim() > 1:\n",
" nn.init.xavier_uniform(t)\n",
"\n",
" return Transformer\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "5f594f1fbc6ec12c92a2efee092a20dcfd0697dc036fc348ba81f2fc261c5e29"
},
"kernelspec": {
"display_name": "Python 3.11.5 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
|