Upload folder using huggingface_hub
Browse files- trainer_0/ae.pt +3 -0
- trainer_0/checkpoints/ae_0.pt +3 -0
- trainer_0/checkpoints/ae_2048.pt +3 -0
- trainer_0/checkpoints/ae_4096.pt +3 -0
- trainer_0/checkpoints/ae_6144.pt +3 -0
- trainer_0/checkpoints/ae_8192.pt +3 -0
- trainer_0/config.json +26 -0
trainer_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d125ff7382fd752d120ea23ae9998f4f67d28fdd80e39fb67166ad38c42f8ddf
|
3 |
+
size 37778216
|
trainer_0/checkpoints/ae_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0437026439ef0e23bd32aaa7628425bb19a5b1ae38588a345259f8f2920a0a4d
|
3 |
+
size 37778232
|
trainer_0/checkpoints/ae_2048.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3d6c05bb4a658d67ae413b57a9abdfbe19bf149617d3846b731d27da63b0172
|
3 |
+
size 37778320
|
trainer_0/checkpoints/ae_4096.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57915ac61dcbd86d11f2147bffed378d8964ebec3ffab0e23df9c8a820934663
|
3 |
+
size 37778320
|
trainer_0/checkpoints/ae_6144.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a7ed17a36dbe840b7e50cd677cbf2dcc51cd65a518304229d15c8240f40b1bb
|
3 |
+
size 37778320
|
trainer_0/checkpoints/ae_8192.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c63dbcda73ff7cebac799c34faf0a3b565d8193759074d539e1ee55583b45317
|
3 |
+
size 37778320
|
trainer_0/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.00032659863237109043,
|
6 |
+
"steps": 30000,
|
7 |
+
"seed": null,
|
8 |
+
"activation_dim": 768,
|
9 |
+
"dict_size": 6144,
|
10 |
+
"k": 30,
|
11 |
+
"device": "cuda",
|
12 |
+
"layer": "0",
|
13 |
+
"lm_name": "TinyModel_2L_3E",
|
14 |
+
"wandb_name": "AutoEncoderTopK",
|
15 |
+
"submodule_name": null
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 768,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 30000.0,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 512,
|
23 |
+
"out_batch_size": 8192,
|
24 |
+
"device": "cuda"
|
25 |
+
}
|
26 |
+
}
|