Delete gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_195/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_195/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_1953/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_1953/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_19531/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_19531/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_617/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_617/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_6176/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_6176/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_61763/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_61763/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_195/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_195/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_1953/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_1953/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_19531/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_19531/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_617/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_617/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_6176/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_6176/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_61763/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_61763/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_195/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_195/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_1953/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_1953/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_19531/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_19531/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_617/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_617/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_6176/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_6176/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_61763/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_61763/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_195/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_195/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_1953/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_1953/config.json +0 -27
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_19531/ae.pt +0 -3
- gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_19531/config.json +0 -27
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:128cb0fe4243dbb2ec45b6e104d765dae7fba4e6ed08cdb5e9136bac1d1d8c22
|
3 |
-
size 1208232760
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.025,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "0"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_195/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:e2719944aa66b4b8a560631fcad5c75b33841c9d7f912952964f5d9cb99b5fec
|
3 |
-
size 1208232776
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_195/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.025,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "195"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_1953/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:3fc8256b3a3256b6e5418b9d91b5734339cefeaae85e42c82d4b52008a38b27f
|
3 |
-
size 1208232848
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_1953/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.025,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "1953"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_19531/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:03dee9a5170fbb2615fb26cd612921da7f0418ceb4ba8bdbac70a2ae5d0e9860
|
3 |
-
size 1208233048
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_19531/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.025,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "19531"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_617/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:43df85ceb189027f81fe27c41aa57fb75d22bc81dbafd20e64147daf97035cdf
|
3 |
-
size 1208232776
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_617/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.025,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "617"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_6176/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:5c732f580ab880879cdde780baee7d26987f9867b7d7f23bca02e8907769fa58
|
3 |
-
size 1208232848
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_6176/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.025,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "6176"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_61763/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:b4ae0849c49f1767bb915700aae9995dc9bcff76ff3c63dd820df7ef966c28a6
|
3 |
-
size 1208233048
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_61763/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.025,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "61763"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:128cb0fe4243dbb2ec45b6e104d765dae7fba4e6ed08cdb5e9136bac1d1d8c22
|
3 |
-
size 1208232760
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.035,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "0"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_195/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:94b2d3c50422f04a443b32a439c897098fbbc36aa205194d42848cd016a3b8f7
|
3 |
-
size 1208232776
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_195/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.035,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "195"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_1953/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:96c629f309743528eae9100f7ad4de436f74372ec3ad4b9df2b8b42766e73928
|
3 |
-
size 1208232848
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_1953/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.035,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "1953"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_19531/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:1d80023e2d3c4825e43bdf2863afe3be88422263ffbca3e22cf6f1c861197be1
|
3 |
-
size 1208233048
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_19531/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.035,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "19531"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_617/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c2154ad3037938d4584f8f6b38773f58fe3a28a1d95ca64122baa8b7ca6fe681
|
3 |
-
size 1208232776
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_617/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.035,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "617"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_6176/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:0fc3a384661f113926ac9709e5a0279eb5818f7c95d360459eea20e7783c1929
|
3 |
-
size 1208232848
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_6176/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.035,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "6176"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_61763/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f2f5f81d086fdbb0d0c970b1a8592d474224cc8a10771351045fae65e5a11406
|
3 |
-
size 1208233048
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_61763/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.035,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "61763"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:128cb0fe4243dbb2ec45b6e104d765dae7fba4e6ed08cdb5e9136bac1d1d8c22
|
3 |
-
size 1208232760
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.04,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "0"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_195/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:5c82ad7f17773d76485798e5bbfd41d1854d5d28cdce8015e1b537cf671197d1
|
3 |
-
size 1208232776
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_195/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.04,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "195"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_1953/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:6cfe58ba4ea5e4aa094a4d7f41bb1d067965ba57ed9c342c81335f62b8863ba8
|
3 |
-
size 1208232848
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_1953/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.04,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "1953"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_19531/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:64f7c9c06f1e41050ad18309eaf59b11e5d60b9cad6cf323353f67cbf94c6893
|
3 |
-
size 1208233048
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_19531/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.04,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "19531"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_617/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:375a73377c3ab24b5a2a3ea113530d00b47d856c236430df5841022e8680e79d
|
3 |
-
size 1208232776
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_617/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.04,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "617"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_6176/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:466756eef1a6aff1f13301308339b0e57502a85914d51a874f95531ef442020c
|
3 |
-
size 1208232848
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_6176/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.04,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "6176"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_61763/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a75d62beba010099b105df801d0277ea97ee73b78aa42606ad714a21f2c8fa08
|
3 |
-
size 1208233048
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_61763/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.04,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "61763"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:128cb0fe4243dbb2ec45b6e104d765dae7fba4e6ed08cdb5e9136bac1d1d8c22
|
3 |
-
size 1208232760
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.05,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "0"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_195/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:7fd81969d804d3b87a344c72d08d26dcb8a65c25dc87d818f875d4e2ac09f347
|
3 |
-
size 1208232776
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_195/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.05,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "195"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_1953/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:7dd1e2ddd31bf59234b04214e8231f913688d48efb36667e84dc5bcee4e39285
|
3 |
-
size 1208232848
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_1953/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.05,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "1953"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_19531/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:350beb4c71c62ff10017f1024b8888cffb47e03b4a9773357d800de783ae719e
|
3 |
-
size 1208233048
|
|
|
|
|
|
|
|
gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_19531/config.json
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"dict_class": "AutoEncoder",
|
4 |
-
"trainer_class": "StandardTrainer",
|
5 |
-
"activation_dim": 2304,
|
6 |
-
"dict_size": 65536,
|
7 |
-
"lr": 0.0003,
|
8 |
-
"l1_penalty": 0.05,
|
9 |
-
"warmup_steps": 1000,
|
10 |
-
"resample_steps": null,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 5,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5",
|
15 |
-
"submodule_name": "resid_post_layer_5",
|
16 |
-
"steps": "19531"
|
17 |
-
},
|
18 |
-
"buffer": {
|
19 |
-
"d_submodule": 2304,
|
20 |
-
"io": "out",
|
21 |
-
"n_ctxs": 2048,
|
22 |
-
"ctx_len": 128,
|
23 |
-
"refresh_batch_size": 32,
|
24 |
-
"out_batch_size": 2048,
|
25 |
-
"device": "cuda:0"
|
26 |
-
}
|
27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|