Delete gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_308/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_308/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_3088/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_3088/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_30881/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_30881/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_97/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_97/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_976/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_976/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_9765/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_9765/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_308/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_308/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_3088/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_3088/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_30881/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_30881/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_97/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_97/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_976/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_976/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_9765/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_9765/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_308/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_308/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_3088/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_3088/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_30881/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_30881/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_97/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_97/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_976/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_976/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_9765/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_9765/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_308/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_308/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_3088/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_3088/config.json +0 -26
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_30881/ae.pt +0 -3
- gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_30881/config.json +0 -26
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c2fa6020d346a61f1248b174c40ae3a225c988426c118ed376952b74cc14a391
|
3 |
-
size 1208232760
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "0",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 20,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_308/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:2936474d9c934443b3d5369b94bdc45053a7f4b2b5ba89cb53fc2651dccce915
|
3 |
-
size 1208232776
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_308/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "308",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 20,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_3088/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:3e1e3d667af479cce5692ac6aab166258cacf1ed4d480fbd80eeadb32127315d
|
3 |
-
size 1208232848
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_3088/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "3088",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 20,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_30881/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:1ae2e3b8be3f90444c4496646fb6aa604f0c2c7809b62a0d8608c47d1e697a2c
|
3 |
-
size 1208233048
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_30881/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "30881",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 20,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_97/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:98396469af5a09409acad191e2fb8637e647029ad7fe6764ea11b847b1471e6d
|
3 |
-
size 1208232768
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_97/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "97",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 20,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_976/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:72247aad0086f6772c263e029965661f81a79ea11caae94727f16d95c07d313c
|
3 |
-
size 1208232776
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_976/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "976",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 20,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_9765/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8de72270cab9c46aa2cdb3c51a7e986bab6f43b2e5512a1d77f4c43613e20e4e
|
3 |
-
size 1208232848
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_9765/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "9765",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 20,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c2fa6020d346a61f1248b174c40ae3a225c988426c118ed376952b74cc14a391
|
3 |
-
size 1208232760
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "0",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 40,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_308/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:610e1d15ff4fcb05fe0db164879b316f1205732e25850e145091c9cb48bfa09c
|
3 |
-
size 1208232776
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_308/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "308",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 40,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_3088/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9d9d3b8aef0f52860267de45ac12977fe86826f35cdb6b87a3829673a37c36cb
|
3 |
-
size 1208232848
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_3088/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "3088",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 40,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_30881/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8190d85699dad6aaeef16cad0731e96b63676f479e8f4fe151062e2eece8e4aa
|
3 |
-
size 1208233048
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_30881/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "30881",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 40,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_97/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8e5458365646730ae06d8e514b5c728bd501dab0ba227af64729218b2c76dd9d
|
3 |
-
size 1208232768
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_97/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "97",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 40,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_976/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:e4a2260a96e754b304bdbfda0175f9dde53527e2c3cc76023e1f21eadc6889fd
|
3 |
-
size 1208232776
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_976/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "976",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 40,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_9765/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c4fafb152c01023057a05eb8d0e2f2b50263ce78eb201cb1269ce0dbbc5c7585
|
3 |
-
size 1208232848
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_9765/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "9765",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 40,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c2fa6020d346a61f1248b174c40ae3a225c988426c118ed376952b74cc14a391
|
3 |
-
size 1208232760
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "0",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 80,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_308/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:819dec7bf756472ec9ad3ce387cd15ff50e18491d98df4775c70db889f84745e
|
3 |
-
size 1208232776
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_308/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "308",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 80,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_3088/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:4956f6f43595e7dc95488bf8f4eac64e76b2b22f50d0a488bcc5088a229747c8
|
3 |
-
size 1208232848
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_3088/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "3088",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 80,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_30881/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:2b8ca264d34fc9c813271d4740f8354d920adc3a8bb2575e010e191f6d652af8
|
3 |
-
size 1208233048
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_30881/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "30881",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 80,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_97/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:435d42cfbfe4fb8260ba563ecedb62184ff808afd8f7070a8c23b2b1f503838e
|
3 |
-
size 1208232768
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_97/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "97",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 80,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_976/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f8af3df52dba4e0b8ae0f464cf3c7c7c2fc55b12e7e1ebd5e325e49f5d487b69
|
3 |
-
size 1208232776
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_976/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "976",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 80,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_9765/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:025b7d7aa2e0085f3c34e1cff16b900a16cb18bf6e8d8e7984dfead442d22ca2
|
3 |
-
size 1208232848
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_9765/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "9765",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 80,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c2fa6020d346a61f1248b174c40ae3a225c988426c118ed376952b74cc14a391
|
3 |
-
size 1208232760
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "0",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 160,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_308/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f1172e3e921375af16476b723059354adca19d37dadbd0e5c022ba9cc633bec2
|
3 |
-
size 1208232776
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_308/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "308",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 160,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_3088/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:12b637cb1114124801da785a1af70fd65bce57142a6d7bc824308e49caba8a64
|
3 |
-
size 1208232848
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_3088/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "3088",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 160,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_30881/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:821b99c90698176c46a4ad714484fbe8f816ed7deb4cd1b0fde7b2791d5f0a6e
|
3 |
-
size 1208233048
|
|
|
|
|
|
|
|
gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_30881/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001,
|
6 |
-
"steps": "30881",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 65536,
|
10 |
-
"k": 160,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2048,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 24,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|