Spaces:
Runtime error
Runtime error
Removed dataclasses from requirements
Browse files- attention_replication.py +2 -2
- requirements.txt +147 -154
- transformer_replication.py +16 -13
attention_replication.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
# %%
|
2 |
import torch as t
|
3 |
import torch.nn as nn
|
4 |
-
from typing import Union
|
5 |
from fancy_einsum import einsum
|
6 |
-
from einops import repeat, rearrange
|
7 |
import numpy as np
|
8 |
#%%
|
9 |
def single_head_attention(Q: t.Tensor, K: t.Tensor, V: t.Tensor) -> t.Tensor:
|
|
|
1 |
# %%
|
2 |
import torch as t
|
3 |
import torch.nn as nn
|
4 |
+
from typing import Union
|
5 |
from fancy_einsum import einsum
|
6 |
+
from einops import repeat, rearrange
|
7 |
import numpy as np
|
8 |
#%%
|
9 |
def single_head_attention(Q: t.Tensor, K: t.Tensor, V: t.Tensor) -> t.Tensor:
|
requirements.txt
CHANGED
@@ -1,168 +1,161 @@
|
|
1 |
-
aiofiles
|
2 |
-
aiohttp
|
3 |
-
aiosignal
|
4 |
-
altair
|
5 |
-
anyio
|
6 |
-
argon2-cffi
|
7 |
-
argon2-cffi-bindings
|
8 |
-
asttokens
|
9 |
-
async-timeout
|
10 |
-
attrs
|
11 |
-
backcall
|
12 |
-
backports.functools-lru-cache
|
13 |
-
beautifulsoup4
|
14 |
-
bleach
|
15 |
-
brotlipy
|
16 |
certifi==2022.12.7
|
17 |
-
cffi
|
18 |
-
charset-normalizer
|
19 |
-
click
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
dill @ file:///home/conda/feedstock_root/build_artifacts/dill_1666603105584/work
|
31 |
-
einops @ file:///home/conda/feedstock_root/build_artifacts/einops_1670600230829/work
|
32 |
-
entrypoints @ file:///home/conda/feedstock_root/build_artifacts/entrypoints_1643888246732/work
|
33 |
-
executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1667317341051/work
|
34 |
fancy-einsum==0.0.3
|
35 |
-
fastapi
|
36 |
-
fastjsonschema
|
37 |
-
ffmpy
|
38 |
-
filelock
|
39 |
-
Flask
|
40 |
-
flit_core
|
41 |
-
fonttools
|
42 |
-
frozenlist
|
43 |
-
fsspec
|
44 |
-
gradio
|
45 |
-
h11
|
46 |
-
h2
|
47 |
hpack==4.0.0
|
48 |
-
httpcore
|
49 |
-
httpx
|
50 |
-
huggingface-hub
|
51 |
-
hyperframe
|
52 |
-
idna
|
53 |
-
importlib-metadata
|
54 |
-
importlib-resources
|
55 |
-
ipykernel
|
56 |
-
ipython
|
57 |
ipython-genutils==0.2.0
|
58 |
-
ipywidgets
|
59 |
-
itsdangerous
|
60 |
-
jedi
|
61 |
-
Jinja2
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
jupyter-console
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
jupyterlab-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
matplotlib
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
multidict @ file:///home/conda/feedstock_root/build_artifacts/multidict_1672339403932/work
|
83 |
-
multiprocess @ file:///home/conda/feedstock_root/build_artifacts/multiprocess_1666932878376/work
|
84 |
munkres==1.1.4
|
85 |
-
nbclassic
|
86 |
-
nbclient
|
87 |
-
nbconvert
|
88 |
-
nbformat
|
89 |
-
nest-asyncio
|
90 |
-
notebook
|
91 |
-
notebook_shim
|
92 |
-
numpy
|
93 |
-
orjson
|
94 |
-
packaging
|
95 |
pandas==1.5.3
|
96 |
-
pandocfilters
|
97 |
-
parso
|
98 |
-
pexpect
|
99 |
-
pickleshare
|
100 |
-
Pillow
|
101 |
-
|
102 |
-
|
|
|
103 |
ply==3.11
|
104 |
-
prometheus-client
|
105 |
-
prompt-toolkit
|
106 |
-
psutil
|
107 |
-
ptyprocess
|
108 |
-
pure-eval
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
pyparsing @ file:///home/conda/feedstock_root/build_artifacts/pyparsing_1652235407899/work
|
117 |
PyQt5==5.15.7
|
118 |
PyQt5-sip==12.11.0
|
119 |
-
pyrsistent
|
120 |
-
PySocks
|
121 |
-
python-dateutil
|
122 |
-
python-json-logger
|
123 |
python-multipart==0.0.5
|
124 |
-
pytz
|
125 |
-
PyYAML
|
126 |
-
pyzmq
|
127 |
-
qtconsole
|
128 |
-
QtPy
|
129 |
-
regex
|
130 |
-
requests
|
131 |
-
|
132 |
-
|
133 |
-
rfc3986
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
toolz @ file:///home/conda/feedstock_root/build_artifacts/toolz_1657485559105/work
|
148 |
torch==1.13.1
|
149 |
torchaudio==0.13.1
|
150 |
torchvision==0.14.1
|
151 |
-
tornado
|
152 |
-
tqdm
|
153 |
-
traitlets
|
154 |
-
transformers
|
155 |
-
typing_extensions
|
156 |
-
uc-micro-py
|
157 |
-
unicodedata2
|
158 |
-
urllib3
|
159 |
-
uvicorn
|
160 |
-
wcwidth
|
161 |
webencodings==0.5.1
|
162 |
-
websocket-client
|
163 |
-
websockets
|
164 |
-
Werkzeug
|
165 |
-
|
166 |
-
|
167 |
-
yarl
|
168 |
-
zipp
|
|
|
1 |
+
aiofiles==23.1.0
|
2 |
+
aiohttp==3.8.4
|
3 |
+
aiosignal==1.3.1
|
4 |
+
altair==4.2.2
|
5 |
+
anyio==3.6.2
|
6 |
+
argon2-cffi==21.3.0
|
7 |
+
argon2-cffi-bindings==21.2.0
|
8 |
+
asttokens==2.2.1
|
9 |
+
async-timeout==4.0.2
|
10 |
+
attrs==22.2.0
|
11 |
+
backcall==0.2.0
|
12 |
+
backports.functools-lru-cache==1.6.4
|
13 |
+
beautifulsoup4==4.11.2
|
14 |
+
bleach==6.0.0
|
15 |
+
brotlipy==0.7.0
|
16 |
certifi==2022.12.7
|
17 |
+
cffi==1.15.1
|
18 |
+
charset-normalizer==2.1.1
|
19 |
+
click==8.1.3
|
20 |
+
comm==0.1.2
|
21 |
+
contourpy==1.0.7
|
22 |
+
cryptography==39.0.1
|
23 |
+
cycler==0.11.0
|
24 |
+
debugpy==1.6.6
|
25 |
+
decorator==5.1.1
|
26 |
+
defusedxml==0.7.1
|
27 |
+
einops==0.6.0
|
28 |
+
entrypoints==0.4
|
29 |
+
executing==1.2.0
|
|
|
|
|
|
|
|
|
30 |
fancy-einsum==0.0.3
|
31 |
+
fastapi==0.92.0
|
32 |
+
fastjsonschema==2.16.2
|
33 |
+
ffmpy==0.3.0
|
34 |
+
filelock==3.9.0
|
35 |
+
Flask==2.2.3
|
36 |
+
flit_core==3.8.0
|
37 |
+
fonttools==4.38.0
|
38 |
+
frozenlist==1.3.3
|
39 |
+
fsspec==2023.1.0
|
40 |
+
gradio==3.19.1
|
41 |
+
h11==0.14.0
|
42 |
+
h2==4.1.0
|
43 |
hpack==4.0.0
|
44 |
+
httpcore==0.16.3
|
45 |
+
httpx==0.23.3
|
46 |
+
huggingface-hub==0.12.1
|
47 |
+
hyperframe==6.0.1
|
48 |
+
idna==3.4
|
49 |
+
importlib-metadata==6.0.0
|
50 |
+
importlib-resources==5.12.0
|
51 |
+
ipykernel==6.21.2
|
52 |
+
ipython==8.10.0
|
53 |
ipython-genutils==0.2.0
|
54 |
+
ipywidgets==8.0.4
|
55 |
+
itsdangerous==2.1.2
|
56 |
+
jedi==0.18.2
|
57 |
+
Jinja2==3.1.2
|
58 |
+
jsonschema==4.17.3
|
59 |
+
jupyter==1.0.0
|
60 |
+
jupyter_client==8.0.3
|
61 |
+
jupyter-console==6.5.1
|
62 |
+
jupyter_core==5.2.0
|
63 |
+
jupyter-events==0.6.3
|
64 |
+
jupyter_server==2.3.0
|
65 |
+
jupyter_server_terminals==0.4.4
|
66 |
+
jupyterlab-pygments==0.2.2
|
67 |
+
jupyterlab-widgets==3.0.5
|
68 |
+
kiwisolver==1.4.4
|
69 |
+
linkify-it-py==2.0.0
|
70 |
+
markdown-it-py==2.1.0
|
71 |
+
MarkupSafe==2.1.2
|
72 |
+
matplotlib==3.7.0
|
73 |
+
matplotlib-inline==0.1.6
|
74 |
+
mdit-py-plugins==0.3.3
|
75 |
+
mdurl==0.1.0
|
76 |
+
mistune==2.0.5
|
77 |
+
multidict==6.0.4
|
|
|
|
|
78 |
munkres==1.1.4
|
79 |
+
nbclassic==0.5.2
|
80 |
+
nbclient==0.7.2
|
81 |
+
nbconvert==7.2.9
|
82 |
+
nbformat==5.7.3
|
83 |
+
nest-asyncio==1.5.6
|
84 |
+
notebook==6.5.2
|
85 |
+
notebook_shim==0.2.2
|
86 |
+
numpy==1.24.2
|
87 |
+
orjson==3.8.5
|
88 |
+
packaging==23.0
|
89 |
pandas==1.5.3
|
90 |
+
pandocfilters==1.5.0
|
91 |
+
parso==0.8.3
|
92 |
+
pexpect==4.8.0
|
93 |
+
pickleshare==0.7.5
|
94 |
+
Pillow==9.4.0
|
95 |
+
pip==23.0.1
|
96 |
+
pkgutil_resolve_name==1.3.10
|
97 |
+
platformdirs==3.0.0
|
98 |
ply==3.11
|
99 |
+
prometheus-client==0.16.0
|
100 |
+
prompt-toolkit==3.0.36
|
101 |
+
psutil==5.9.4
|
102 |
+
ptyprocess==0.7.0
|
103 |
+
pure-eval==0.2.2
|
104 |
+
pycparser==2.21
|
105 |
+
pycryptodome==3.16.0
|
106 |
+
pydantic==1.10.5
|
107 |
+
pydub==0.25.1
|
108 |
+
Pygments==2.14.0
|
109 |
+
pyOpenSSL==23.0.0
|
110 |
+
pyparsing==3.0.9
|
|
|
111 |
PyQt5==5.15.7
|
112 |
PyQt5-sip==12.11.0
|
113 |
+
pyrsistent==0.19.3
|
114 |
+
PySocks==1.7.1
|
115 |
+
python-dateutil==2.8.2
|
116 |
+
python-json-logger==2.0.6
|
117 |
python-multipart==0.0.5
|
118 |
+
pytz==2022.7.1
|
119 |
+
PyYAML==6.0
|
120 |
+
pyzmq==25.0.0
|
121 |
+
qtconsole==5.4.0
|
122 |
+
QtPy==2.3.0
|
123 |
+
regex==2022.10.31
|
124 |
+
requests==2.28.2
|
125 |
+
rfc3339-validator==0.1.4
|
126 |
+
rfc3986==1.5.0
|
127 |
+
rfc3986-validator==0.1.1
|
128 |
+
Send2Trash==1.8.0
|
129 |
+
setuptools==67.3.2
|
130 |
+
sip==6.7.7
|
131 |
+
six==1.16.0
|
132 |
+
sniffio==1.3.0
|
133 |
+
soupsieve==2.3.2.post1
|
134 |
+
stack-data==0.6.2
|
135 |
+
starlette==0.25.0
|
136 |
+
terminado==0.17.1
|
137 |
+
tinycss2==1.2.1
|
138 |
+
tokenizers==0.13.2
|
139 |
+
toml==0.10.2
|
140 |
+
toolz==0.12.0
|
|
|
141 |
torch==1.13.1
|
142 |
torchaudio==0.13.1
|
143 |
torchvision==0.14.1
|
144 |
+
tornado==6.2
|
145 |
+
tqdm==4.64.1
|
146 |
+
traitlets==5.9.0
|
147 |
+
transformers==4.26.1
|
148 |
+
typing_extensions==4.4.0
|
149 |
+
uc-micro-py==1.0.1
|
150 |
+
unicodedata2==15.0.0
|
151 |
+
urllib3==1.26.14
|
152 |
+
uvicorn==0.20.0
|
153 |
+
wcwidth==0.2.6
|
154 |
webencodings==0.5.1
|
155 |
+
websocket-client==1.5.1
|
156 |
+
websockets==10.4
|
157 |
+
Werkzeug==2.2.3
|
158 |
+
wheel==0.38.4
|
159 |
+
widgetsnbextension==4.0.5
|
160 |
+
yarl==1.8.2
|
161 |
+
zipp==3.14.0
|
transformer_replication.py
CHANGED
@@ -5,13 +5,7 @@ import torch.nn as nn
|
|
5 |
from typing import Union, List
|
6 |
from fancy_einsum import einsum
|
7 |
import torch as t
|
8 |
-
|
9 |
-
from torchvision import datasets, transforms
|
10 |
-
from torch.utils.data import DataLoader
|
11 |
-
from typing import Union, Optional, Callable, Tuple
|
12 |
-
import numpy as np
|
13 |
-
from einops import rearrange
|
14 |
-
import time
|
15 |
# %%
|
16 |
tokenizer = transformers.AutoTokenizer.from_pretrained("gpt2")
|
17 |
if __name__ == "__main__":
|
@@ -90,9 +84,6 @@ class LayerNorm(nn.Module):
|
|
90 |
pass
|
91 |
|
92 |
# %%
|
93 |
-
from dataclasses import dataclass
|
94 |
-
|
95 |
-
@dataclass(frozen=True)
|
96 |
class TransformerConfig:
|
97 |
'''Constants used throughout your decoder-only transformer model.'''
|
98 |
|
@@ -101,10 +92,22 @@ class TransformerConfig:
|
|
101 |
vocab_size: int
|
102 |
hidden_size: int
|
103 |
max_seq_len: int
|
104 |
-
dropout: float
|
105 |
-
layer_norm_epsilon: float
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
# %%
|
107 |
-
|
108 |
|
109 |
class BertMLP(nn.Module):
|
110 |
def __init__(self, config: TransformerConfig):
|
|
|
5 |
from typing import Union, List
|
6 |
from fancy_einsum import einsum
|
7 |
import torch as t
|
8 |
+
import attention_replication
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
# %%
|
10 |
tokenizer = transformers.AutoTokenizer.from_pretrained("gpt2")
|
11 |
if __name__ == "__main__":
|
|
|
84 |
pass
|
85 |
|
86 |
# %%
|
|
|
|
|
|
|
87 |
class TransformerConfig:
|
88 |
'''Constants used throughout your decoder-only transformer model.'''
|
89 |
|
|
|
92 |
vocab_size: int
|
93 |
hidden_size: int
|
94 |
max_seq_len: int
|
95 |
+
dropout: float
|
96 |
+
layer_norm_epsilon: float
|
97 |
+
|
98 |
+
def __init__(
|
99 |
+
self, num_layers, num_heads, vocab_size, hidden_size, max_seq_len,
|
100 |
+
dropout=0.1, layer_norm_epsilon=1e-5,
|
101 |
+
) -> None:
|
102 |
+
self.num_layers = num_layers
|
103 |
+
self.num_heads = num_heads
|
104 |
+
self.vocab_size = vocab_size
|
105 |
+
self.hidden_size = hidden_size
|
106 |
+
self.max_seq_len = max_seq_len
|
107 |
+
self.dropout = dropout
|
108 |
+
self.layer_norm_epsilon = layer_norm_epsilon
|
109 |
# %%
|
110 |
+
|
111 |
|
112 |
class BertMLP(nn.Module):
|
113 |
def __init__(self, config: TransformerConfig):
|