diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0b1b582e0d3ef63d7150f5952a6298bf3359f379
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:731f9e5cdb09067bf8e15f162aff6438e7e79db5a9987c735e4b28b97fe7d2b7
+size 100733974
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..458059e795deab45bd7cfcd514c55dcfa55dd5e3
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json
@@ -0,0 +1,32 @@
+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 20,
+        "device": "cuda:1",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_0",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:1"
+    }
+}
\ No newline at end of file
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..7336eb9ae2ab9f349f07814ea62bdc61a5139f60
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 7.165702819824219, "l1_loss": 52.398841048731946, "l0": 19.910211158521246, "frac_variance_explained": 0.9256864298473705, "cossim": 0.9520920298316262, "l2_ratio": 0.9514842954548922, "relative_reconstruction_bias": 0.9993569128441088, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.9917597409450645, "loss_zero": 12.187079458525687, "frac_recovered": 0.9568525624997688, "frac_alive": 0.933837890625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4164de90d990579350e4bbc4849438441a9d8e78
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da5a8f71335eba31afa1eca0d8b6f01790dec259246cbd6bc7225951a5486d9a
+size 100733974
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c0f29dec665105aacf5042ad142c273d56be5252
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json
@@ -0,0 +1,32 @@
+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 40,
+        "device": "cuda:1",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_1",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:1"
+    }
+}
\ No newline at end of file
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..ed4ede0ea39346ab3f72e328f0590fb391f35bb1
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 6.358675190896699, "l1_loss": 67.62095688328598, "l0": 39.82553227742513, "frac_variance_explained": 0.9409584890712391, "cossim": 0.9623859289920691, "l2_ratio": 0.9620392033548066, "relative_reconstruction_bias": 0.9997165492086699, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.852238994656187, "loss_zero": 12.187079458525687, "frac_recovered": 0.9724247419472897, "frac_alive": 0.92108154296875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c499093e7c3a1514a99a6daa5b7905b8295a5ec7
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe65f3e4ad7fe582508b307625716ff197a04a3d371834c7222c023169597fbd
+size 100733974
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8868e9ce225ee28147affb75065695ae8dd68c89
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json
@@ -0,0 +1,32 @@
+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 80,
+        "device": "cuda:1",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_2",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:1"
+    }
+}
\ No newline at end of file
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..45871e1c23861f69029a835c6e75ee2084f4ef3e
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 5.535941615249172, "l1_loss": 90.0740072076971, "l0": 79.61385576652758, "frac_variance_explained": 0.9549660375623992, "cossim": 0.9716276183272853, "l2_ratio": 0.9715044552629645, "relative_reconstruction_bias": 1.0009427467981975, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.7588755361961597, "loss_zero": 12.187079458525687, "frac_recovered": 0.9828645659215522, "frac_alive": 0.873291015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1702746a9e4c7a3aba8fdc5ad707b70599f69848
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62d31b3f692684c53b5c5e8ff60b739baa9a0daf57abe707da33701733063e7d
+size 100733974
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..65ef6069ff892a6817c8715be256c3188038dce4
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json
@@ -0,0 +1,32 @@
+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 160,
+        "device": "cuda:1",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_3",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:1"
+    }
+}
\ No newline at end of file
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..046c6f8c4fc3a003e841b796a81df7b430f9d111
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 4.596404407963608, "l1_loss": 153.62816642992425, "l0": 159.1172462232185, "frac_variance_explained": 0.9690298311638109, "cossim": 0.98062437953371, "l2_ratio": 0.9803878791404493, "relative_reconstruction_bias": 1.0007607864611077, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.6916814464511294, "loss_zero": 12.187079458525687, "frac_recovered": 0.9904219201116851, "frac_alive": 0.7470703125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ee57c2cd2abc3017d37bb942f8e419a2be8052f6
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff290fc3b27d35eee2431b923928da0a5530eaf02380cc605a3f04aa3bc61287
+size 100733974
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..67bcb01401d37d5940bd75fdcf4a6ebccef68001
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json
@@ -0,0 +1,32 @@
+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 320,
+        "device": "cuda:1",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_4",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:1"
+    }
+}
\ No newline at end of file
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..6300f72d7d835cf405df3350906851605fddbab6
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 3.1190587029312598, "l1_loss": 295.7740866921165, "l0": 318.5566424745502, "frac_variance_explained": 0.9860216288855581, "cossim": 0.9912758794697848, "l2_ratio": 0.9912153554685188, "relative_reconstruction_bias": 1.0004844918395535, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.63657668503848, "loss_zero": 12.187079458525687, "frac_recovered": 0.9965930906209078, "frac_alive": 0.32232666015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..58b8fd913153c163606300a87a138c25e290a716
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:405bcdf303076eb4e67b6dcd34a2c041278a2574400fd19e2deb02eb9df12560
+size 100733974
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6e95d277f1f104399df23bc91d19184e592bb2fd
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json
@@ -0,0 +1,32 @@
+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 640,
+        "device": "cuda:1",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_5",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:1"
+    }
+}
\ No newline at end of file
diff --git a/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..1329723c0a7ba346f313324a21d606bc21559fbf
--- /dev/null
+++ b/BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 1.5527055624759558, "l1_loss": 626.5316938920455, "l0": 639.1975338097775, "frac_variance_explained": 0.9963156291932771, "cossim": 0.9977991869955352, "l2_ratio": 0.9976590293826479, "relative_reconstruction_bias": 1.0002403476021506, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.6137762756058662, "loss_zero": 12.187079458525687, "frac_recovered": 0.9991628256711093, "frac_alive": 0.06146240234375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..123c05984e9dfc2cdf0cad206cfd598cf308178f
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50b23e31ad3ee499518d7bd643be829a3ee7b86b236059a0146799d475bb1c6c
+size 100865046
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8f9ee88b7030431880eb9cadfa2ba6a75b0ebd41
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json
@@ -0,0 +1,28 @@
+{
+    "trainer": {
+        "dict_class": "GatedAutoEncoder",
+        "trainer_class": "GatedSAETrainer",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.012,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_12",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..b8f4a2f1673082ea8b7e0867cfd3e29c77b56d88
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 2.449277211384601, "l1_loss": 233.9408302766731, "l0": 483.06496126105986, "frac_variance_explained": 0.9913476149001753, "cossim": 0.9946498181446489, "l2_ratio": 0.9939263398388782, "relative_reconstruction_bias": 0.9993272940796541, "loss_original": 2.591329812285412, "loss_reconstructed": 2.6073691378156822, "loss_zero": 12.979128625019488, "frac_recovered": 0.9982530004288777, "frac_alive": 0.59857177734375, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..300b95739c9b0588db013f225064e858621f80b5
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c989b27ea8ba899f086eb1e4bbf13a21a3e8ebc86900fb5d588fd7f804b814f4
+size 100865046
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2992e11e19dc426262b633e53173e8d1a444bee0
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json
@@ -0,0 +1,28 @@
+{
+    "trainer": {
+        "dict_class": "GatedAutoEncoder",
+        "trainer_class": "GatedSAETrainer",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.018,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_13",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..14f5bb189c55ba39eed9516bf12da133da342655
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 3.718252821140979, "l1_loss": 174.70882599612315, "l0": 357.33407280244023, "frac_variance_explained": 0.9797811321465366, "cossim": 0.9874855103981064, "l2_ratio": 0.9873908984373851, "relative_reconstruction_bias": 1.0000614502343788, "loss_original": 2.591329812285412, "loss_reconstructed": 2.638556733906987, "loss_zero": 12.979128625019488, "frac_recovered": 0.9948906988264566, "frac_alive": 0.55438232421875, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..18d6ce617e1c78fb0506102e65a59fa59ec8fe81
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0818ec859a15998633c73ad5dce110641f60f0b7b999aafd459d8bcaafa407d
+size 100865046
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..16f02cbc3a6951bdfe75737befa9f914a64a83bb
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json
@@ -0,0 +1,28 @@
+{
+    "trainer": {
+        "dict_class": "GatedAutoEncoder",
+        "trainer_class": "GatedSAETrainer",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.024,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_14",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..e9e7a4665b3ee8fd24ab14b595bf41606105c8d4
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 4.538920862128935, "l1_loss": 141.84569384103798, "l0": 227.8396312299981, "frac_variance_explained": 0.9699712948626783, "cossim": 0.9812705624534424, "l2_ratio": 0.98167203241084, "relative_reconstruction_bias": 1.000891358737486, "loss_original": 2.591329812285412, "loss_reconstructed": 2.6714729882148376, "loss_zero": 12.979128625019488, "frac_recovered": 0.9913170700331768, "frac_alive": 0.71710205078125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..16dfc685588ff19c11192e2d32fb9970f23c68b0
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01de966bee6924eeaadb356982edc47ee4ae5d8c402495317c21dfafaf2361f7
+size 100865046
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..929dede216c8793f48760402ed703029b628eabc
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json
@@ -0,0 +1,28 @@
+{
+    "trainer": {
+        "dict_class": "GatedAutoEncoder",
+        "trainer_class": "GatedSAETrainer",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.04,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_15",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..aa7a4fd81668d0a15e4b211b2ce5fdc961182a7c
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 5.540974214852574, "l1_loss": 94.82435709022614, "l0": 95.3962563204478, "frac_variance_explained": 0.9552597058824746, "cossim": 0.9718739264700786, "l2_ratio": 0.9708090194736618, "relative_reconstruction_bias": 0.9997583469712591, "loss_original": 2.591329812285412, "loss_reconstructed": 2.7390204301799637, "loss_zero": 12.979128625019488, "frac_recovered": 0.9840897005724619, "frac_alive": 0.9267578125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3c5f0ad1c398cf2d5ed98d5819fe0d1aabda9305
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2eb06802eee053782d3d338803d4f408ce84b0a1cc281748a266cb4f772a956e
+size 100865046
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6988289fa5ee59d4bff095a23f4500d4585b33a1
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json
@@ -0,0 +1,28 @@
+{
+    "trainer": {
+        "dict_class": "GatedAutoEncoder",
+        "trainer_class": "GatedSAETrainer",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.06,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_16",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..ad1111d0fb62b3c34b286aec188434ed9c20a8b3
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 6.3163925567305235, "l1_loss": 64.96194687808853, "l0": 48.62305409075266, "frac_variance_explained": 0.9414439212126904, "cossim": 0.9631719384566847, "l2_ratio": 0.9623617971517954, "relative_reconstruction_bias": 0.9998143462531538, "loss_original": 2.591329812285412, "loss_reconstructed": 2.8256030183240592, "loss_zero": 12.979128625019488, "frac_recovered": 0.9748218806393175, "frac_alive": 0.92987060546875, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4a0282bf0ad0583beea5c3b910b0299cae264efc
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:492aae9f99769dba8c556b726451e369b1eeb33b1baf558a90444dbfad618fb2
+size 100865046
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d3322fe9d83c3577d957e59743d04f7db5a8c362
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json
@@ -0,0 +1,28 @@
+{
+    "trainer": {
+        "dict_class": "GatedAutoEncoder",
+        "trainer_class": "GatedSAETrainer",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.08,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_17",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..de769c10acdb9767a44d93c85a58246e51bad585
--- /dev/null
+++ b/GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 6.866190913211868, "l1_loss": 55.27316311755812, "l0": 30.90061481889472, "frac_variance_explained": 0.9303119121545769, "cossim": 0.9563094456511808, "l2_ratio": 0.9556090986154165, "relative_reconstruction_bias": 0.9998748388635107, "loss_original": 2.591329812285412, "loss_reconstructed": 2.912389753812767, "loss_zero": 12.979128625019488, "frac_recovered": 0.9655166954161173, "frac_alive": 0.86328125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..695543f9f986719615d3952af0f6339c39279c82
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35b66366e463e23602a12aa239decfaf82325071984f6e48fbfc53f1e97e1aac
+size 100799263
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c986b4e7b29e94907864c929f387db328c43c884
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json
@@ -0,0 +1,29 @@
+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "JumpReluTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_30",
+        "submodule_name": "resid_post_layer_8",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 20
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..3cb2d745c21e24eb40a19e006f5eb97d3a2aef20
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 7.542313641812428, "l1_loss": 59.5405657664839, "l0": 20.316411776715015, "frac_variance_explained": 0.9169782827417534, "cossim": 0.9468502043241478, "l2_ratio": 0.9470895029694201, "relative_reconstruction_bias": 1.0005664681813804, "loss_original": 2.591329812285412, "loss_reconstructed": 3.063094075903835, "loss_zero": 12.979128625019488, "frac_recovered": 0.9492282486823668, "frac_alive": 0.423095703125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e476d9d07e9142c7bfc00086d79fc350d471654
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52a73e6d5540b5666e46b3ce47c868f81846d1ac46cc3f2fd0ed174158fe95e3
+size 100799263
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4220d1a6a91306fc837ce5059290545df307e280
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json
@@ -0,0 +1,29 @@
+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "JumpReluTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_31",
+        "submodule_name": "resid_post_layer_8",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 40
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..88ad7b289ff4c6fb87edbbc17af61d1d0eb0199f
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 6.596800962126399, "l1_loss": 68.14624850721245, "l0": 40.465813809130566, "frac_variance_explained": 0.9358430052378092, "cossim": 0.9595368070056639, "l2_ratio": 0.9597559456365654, "relative_reconstruction_bias": 1.0001378511808006, "loss_original": 2.591329812285412, "loss_reconstructed": 2.866633120071457, "loss_zero": 12.979128625019488, "frac_recovered": 0.9703360113752894, "frac_alive": 0.53546142578125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6e4a128be831d42f4c79d74bb786956e597a554d
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b355bdff67e88e1ccf0c30ff781cd2408f566c203a2ffea6d730c47faf7e6958
+size 100799263
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..eb68293669c19aee70f4b394008a2e5c0ca01601
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json
@@ -0,0 +1,29 @@
+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "JumpReluTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_32",
+        "submodule_name": "resid_post_layer_8",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 80
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..571a73c93b407b65a4ed10d85ba64b56019778dc
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 5.868561931403287, "l1_loss": 87.49280005765249, "l0": 73.93405321419957, "frac_variance_explained": 0.9491253247462124, "cossim": 0.9681477079908531, "l2_ratio": 0.9674936364214104, "relative_reconstruction_bias": 0.9984568217432642, "loss_original": 2.591329812285412, "loss_reconstructed": 2.771394150802888, "loss_zero": 12.979128625019488, "frac_recovered": 0.9805464475269777, "frac_alive": 0.52166748046875, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..09d0bae64f94ada4ab3bbf35031614c5ba3f33f0
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57b55991b9ed3a61bfbd12123f73db672397e9e3917e4eb1a14a0dc8e05792dc
+size 100799263
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..65793daf008f41644144ecd066d4653398d984f4
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json
@@ -0,0 +1,29 @@
+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "JumpReluTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_33",
+        "submodule_name": "resid_post_layer_8",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 160
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..a1177d398b120fad267d1d1d4229a736bba521bd
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 4.841383078012122, "l1_loss": 130.77086565867964, "l0": 155.94150111186934, "frac_variance_explained": 0.9658320416887123, "cossim": 0.9784838686506432, "l2_ratio": 0.978472747716559, "relative_reconstruction_bias": 1.0000444162322815, "loss_original": 2.591329812285412, "loss_reconstructed": 2.6872186675129166, "loss_zero": 12.979128625019488, "frac_recovered": 0.9896216367382601, "frac_alive": 0.4205322265625, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3e50a3755441b5f2e0f84cd43e54740fff7005de
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:123ae303303f92498d1269b0e4c32df87c055d51b5bb87f6e7d5cbfbb23efca7
+size 100799263
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6314406a5d5a74a551d10f2f7e4bdcb2337721a3
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json
@@ -0,0 +1,29 @@
+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "JumpReluTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_34",
+        "submodule_name": "resid_post_layer_8",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 320
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..406e4cb106faefcecff540f1c943aabfe7cc5524
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 3.403391051005168, "l1_loss": 212.64206833437265, "l0": 308.2046768004636, "frac_variance_explained": 0.9831308564507818, "cossim": 0.9895132137350289, "l2_ratio": 0.9899121196155088, "relative_reconstruction_bias": 1.0014854985547352, "loss_original": 2.591329812285412, "loss_reconstructed": 2.626830586467881, "loss_zero": 12.979128625019488, "frac_recovered": 0.9961226119334439, "frac_alive": 0.2725830078125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3482c5f046b0c3288e6eef93f5e2411084f49bc8
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c59bff0e7f2d1ed3fbd1422f75d9aa82695904fd3c546e70456c2dee09dc6332
+size 100799263
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..58b539832e6e460e609af1e27f5fb9d5c5047827
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json
@@ -0,0 +1,29 @@
+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "JumpReluTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_35",
+        "submodule_name": "resid_post_layer_8",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 640
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..8508adaba69816e6c50e08bc08063f63e8b678a4
--- /dev/null
+++ b/JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 1.7013380031987846, "l1_loss": 438.71177857180675, "l0": 594.7365741040334, "frac_variance_explained": 0.9956594737897436, "cossim": 0.9974015067858868, "l2_ratio": 0.9970332251255771, "relative_reconstruction_bias": 0.9992425413016813, "loss_original": 2.591329812285412, "loss_reconstructed": 2.5989834702158547, "loss_zero": 12.979128625019488, "frac_recovered": 0.9991641662206995, "frac_alive": 0.0777587890625, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a47835b1dc8de6a926da4d4abe6b6a41b649e5f8
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5090748ecf6e9ffe4a64932b5780db4a88daf2ec7ee242e8cb0990a6a390b20d
+size 100734221
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2d66fce43059dfbda55eccbe31f6eeaeafe2d4cd
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json
@@ -0,0 +1,53 @@
+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            512,
+            1024,
+            2048,
+            4096,
+            8704
+        ],
+        "k": 20,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_0",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..3ba404242e8b38b53cca6cde040571dc44d9bec4
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 7.503067392291444, "l1_loss": 57.648184805205375, "l0": 19.92774974938595, "frac_variance_explained": 0.9185879086003159, "cossim": 0.947629041744001, "l2_ratio": 0.9552449746565386, "relative_reconstruction_bias": 1.0052419864770137, "loss_original": 2.6064688870401094, "loss_reconstructed": 3.059862620902784, "loss_zero": 12.187079458525687, "frac_recovered": 0.9492561925541271, "frac_alive": 0.87738037109375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d3d8f78bc8ade7877eace6f7986234f46460107a
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2dc8f124e90c04d8dbf1c2cd006759a68eb8f6a386d135d7e12069aea3931eb4
+size 100734221
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ff5739666b24d95ee415dbb7147cacf6fe0cfd1b
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json
@@ -0,0 +1,53 @@
+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            512,
+            1024,
+            2048,
+            4096,
+            8704
+        ],
+        "k": 40,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_1",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..5525dca0fc11a57f0631f7dfecfacd7b04a7747b
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 6.681113040808476, "l1_loss": 75.43565345532966, "l0": 39.8682194334088, "frac_variance_explained": 0.9349244598186377, "cossim": 0.95862359530998, "l2_ratio": 0.9662664424289357, "relative_reconstruction_bias": 1.0057821707292036, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.883022969419306, "loss_zero": 12.187079458525687, "frac_recovered": 0.9689650336901346, "frac_alive": 0.9034423828125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2f7715268f27165bb850b3ed910e2081fa360b57
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c8e6e60e103f69c509650e114bd54666baaa734ca06a694c90a33c5c83dd1b3
+size 100734221
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..994c32c07deff1b89be53c01679f4638693063b6
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json
@@ -0,0 +1,53 @@
+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            512,
+            1024,
+            2048,
+            4096,
+            8704
+        ],
+        "k": 80,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_2",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..d1b166782799ef0f1cd010874c2253ce905758e9
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 5.880217046448679, "l1_loss": 107.21432587594697, "l0": 79.61381715716738, "frac_variance_explained": 0.9493099487189091, "cossim": 0.968103901906447, "l2_ratio": 0.9761826793352762, "relative_reconstruction_bias": 1.0057118047367444, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.775832931200663, "loss_zero": 12.187079458525687, "frac_recovered": 0.981011531569741, "frac_alive": 0.87103271484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e685a622b4ec81695b85c4e62cd9d5af5710617d
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:440e92fe914f875a73a4665859ef61a50183264f5eb036068812caaf8f162120
+size 100734221
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..85d3f3ecb55b356d703544a287ea41566b9bdf6f
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json
@@ -0,0 +1,53 @@
+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            512,
+            1024,
+            2048,
+            4096,
+            8704
+        ],
+        "k": 160,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_3",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..6da9ae07871eb1b6c8db4623d28ba7feefba53b9
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 4.954983465599291, "l1_loss": 193.02208964029947, "l0": 159.0796592018821, "frac_variance_explained": 0.9642201698187626, "cossim": 0.977724425720446, "l2_ratio": 0.9847773006468108, "relative_reconstruction_bias": 1.0048337452339404, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.7032747485420923, "loss_zero": 12.187079458525687, "frac_recovered": 0.9891494313875834, "frac_alive": 0.66717529296875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..11ce46f9f54008a6e45a0433ee4c6c559baaa10f
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2660886927bb35a44af3ac7c4308c5aa438f8d5df6ee102ce7d98995dc94c9cb
+size 100734221
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..46eb41690b73c7acb988904efb9ab94fd472e90f
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json
@@ -0,0 +1,53 @@
+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            512,
+            1024,
+            2048,
+            4096,
+            8704
+        ],
+        "k": 320,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_4",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..a43a43ab2460bfbf2d92e2666b822c3cd808b23c
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 3.53629664218787, "l1_loss": 326.76424246123344, "l0": 318.4963119969224, "frac_variance_explained": 0.9821194139393893, "cossim": 0.9888840588656339, "l2_ratio": 0.9927890228502678, "relative_reconstruction_bias": 1.002148378979076, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.643577243342544, "loss_zero": 12.187079458525687, "frac_recovered": 0.9958121306968458, "frac_alive": 0.2459716796875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..231ef297f8be55c8c19964dec9bfb5a757f97d60
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e6040d56b1128db7d560270eecaf952f5b97cd2c012fcac2321ce73976f0578
+size 100734221
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0a25f43a3c84afa5e88cf914d157ce6f8ee129f2
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json
@@ -0,0 +1,53 @@
+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            512,
+            1024,
+            2048,
+            4096,
+            8704
+        ],
+        "k": 640,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_5",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..037e6762491b842d43d5eacb234c66ab8693390f
--- /dev/null
+++ b/MatryoshkaBatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 1.4892961689920137, "l1_loss": 721.7255711410985, "l0": 638.8220381303267, "frac_variance_explained": 0.9966207399512782, "cossim": 0.9979786349065376, "l2_ratio": 0.9975055961897878, "relative_reconstruction_bias": 1.0003309213753901, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.6123990217844644, "loss_zero": 12.187079458525687, "frac_recovered": 0.9993439009695342, "frac_alive": 0.05511474609375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1b30606e43ac598e305a8b23cfe2312c21c8e8be
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88efda2c782a996a9511c0499be5a98e666e0bce366533478099e38a0f6e9abe
+size 100733608
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cc8afec79d95c6f15093fd5478d6d3bf0fb9ad29
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json
@@ -0,0 +1,35 @@
+{
+    "trainer": {
+        "trainer_class": "PAnnealTrainer",
+        "dict_class": "AutoEncoder",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "sparsity_function": "Lp^p",
+        "sparsity_penalty": 0.006,
+        "p_start": 1.0,
+        "p_end": 0.2,
+        "anneal_start": 10000,
+        "sparsity_queue_length": 10,
+        "n_sparsity_updates": 10,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "resample_steps": null,
+        "steps": 244140,
+        "seed": 0,
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "PAnnealTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_0",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..c70b350c85c4608cb0b4587c33dcc1e6eb1a98ca
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 3.3622507919748146, "l1_loss": 161.849647154291, "l0": 401.211550609175, "frac_variance_explained": 0.98363655913307, "cossim": 0.9897847663925354, "l2_ratio": 0.9845664928476494, "relative_reconstruction_bias": 0.9970926865037665, "loss_original": 2.591329812285412, "loss_reconstructed": 2.632457171578005, "loss_zero": 12.979128625019488, "frac_recovered": 0.9955474386014134, "frac_alive": 0.65032958984375, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..30473e5f446631ff0df1a1d28609dccdf490bd9e
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6b4106db2d65bef813107285bf834ed71eaf5c8fc82879022548cba06677d79
+size 100733608
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..36a58aef71fc791c531e5525cdc923e8ddcd3862
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json
@@ -0,0 +1,35 @@
+{
+    "trainer": {
+        "trainer_class": "PAnnealTrainer",
+        "dict_class": "AutoEncoder",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "sparsity_function": "Lp^p",
+        "sparsity_penalty": 0.008,
+        "p_start": 1.0,
+        "p_end": 0.2,
+        "anneal_start": 10000,
+        "sparsity_queue_length": 10,
+        "n_sparsity_updates": 10,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "resample_steps": null,
+        "steps": 244140,
+        "seed": 0,
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "PAnnealTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_1",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..bff0cf240b9ceba2577b30124040b9f5fe2eed47
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 4.5028303674904695, "l1_loss": 117.82930498238069, "l0": 255.23347252535532, "frac_variance_explained": 0.9703729996480137, "cossim": 0.9814757866313658, "l2_ratio": 0.9766738077244127, "relative_reconstruction_bias": 0.9983646517776581, "loss_original": 2.591329812285412, "loss_reconstructed": 2.6821453894477294, "loss_zero": 12.979128625019488, "frac_recovered": 0.9901571744177715, "frac_alive": 0.65704345703125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5f7e48b065e80ee5fa909a0d616aea6b2f11f30a
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dad50bccde025c7a6046b455a88b0e55df0339f49020426d1daa00968c7c3795
+size 100733608
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2bc78206e48690bf0c7ad88ac0462c5164a3ab32
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json
@@ -0,0 +1,35 @@
+{
+    "trainer": {
+        "trainer_class": "PAnnealTrainer",
+        "dict_class": "AutoEncoder",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "sparsity_function": "Lp^p",
+        "sparsity_penalty": 0.01,
+        "p_start": 1.0,
+        "p_end": 0.2,
+        "anneal_start": 10000,
+        "sparsity_queue_length": 10,
+        "n_sparsity_updates": 10,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "resample_steps": null,
+        "steps": 244140,
+        "seed": 0,
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "PAnnealTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_2",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..540f977eb66eacdbe4d1a0af8829f2146041f8ac
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 5.202926089964717, "l1_loss": 93.26969257033015, "l0": 167.94699023143355, "frac_variance_explained": 0.9601707979139075, "cossim": 0.9751063757632152, "l2_ratio": 0.9704936541706682, "relative_reconstruction_bias": 0.9973056086574692, "loss_original": 2.591329812285412, "loss_reconstructed": 2.7267462905631006, "loss_zero": 12.979128625019488, "frac_recovered": 0.9853492372007255, "frac_alive": 0.6607666015625, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb888414ff55dbbfeef97c9a84a6f1659df1edec
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:daaf6b8be0f0cef67781a9bf4d91cd5d3562fe23cfd66bda390c0cac573b5fd2
+size 100733608
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..3429fc0d9acb39b57f991fa3455e8b89d1662fd4
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json
@@ -0,0 +1,35 @@
+{
+    "trainer": {
+        "trainer_class": "PAnnealTrainer",
+        "dict_class": "AutoEncoder",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "sparsity_function": "Lp^p",
+        "sparsity_penalty": 0.015,
+        "p_start": 1.0,
+        "p_end": 0.2,
+        "anneal_start": 10000,
+        "sparsity_queue_length": 10,
+        "n_sparsity_updates": 10,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "resample_steps": null,
+        "steps": 244140,
+        "seed": 0,
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "PAnnealTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_3",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..ff9fdd150243a23020546a79f50f302e148f1f75
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 6.263203859329224, "l1_loss": 64.18408386965832, "l0": 73.46823607295393, "frac_variance_explained": 0.9419336117893816, "cossim": 0.9636158436895853, "l2_ratio": 0.9587060428527464, "relative_reconstruction_bias": 0.9977090491587857, "loss_original": 2.591329812285412, "loss_reconstructed": 2.8357038907257905, "loss_zero": 12.979128625019488, "frac_recovered": 0.973673836653491, "frac_alive": 0.6658935546875, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3659d5978de63ec56024a3335d5709277df7734d
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5903f377dbd7c451f953ade79bdd870a482ef70c2d1619d643f4068d771143cb
+size 100733608
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cfaf59f27051d6a233dcc3a6dd5ecba541efbc49
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json
@@ -0,0 +1,35 @@
+{
+    "trainer": {
+        "trainer_class": "PAnnealTrainer",
+        "dict_class": "AutoEncoder",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "sparsity_function": "Lp^p",
+        "sparsity_penalty": 0.02,
+        "p_start": 1.0,
+        "p_end": 0.2,
+        "anneal_start": 10000,
+        "sparsity_queue_length": 10,
+        "n_sparsity_updates": 10,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "resample_steps": null,
+        "steps": 244140,
+        "seed": 0,
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "PAnnealTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_4",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..a132ebc4bc280e415e77b091260a1f177d57bafe
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 6.8961237625903395, "l1_loss": 51.952152091336536, "l0": 41.2746576056423, "frac_variance_explained": 0.929740074528269, "cossim": 0.9557553210172308, "l2_ratio": 0.95038376586983, "relative_reconstruction_bias": 0.9986502840576401, "loss_original": 2.591329812285412, "loss_reconstructed": 2.9467881781509124, "loss_zero": 12.979128625019488, "frac_recovered": 0.9618090857942421, "frac_alive": 0.66766357421875, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a0c2ccdc31f3a0dc2a547da1ab6bceda74471436
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1848070778207251469acda3d933993543de5d68a38194a37ecb2792e5b4f813
+size 100733608
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0c7871a7475550f23545e239b1b40aad3d032ce4
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json
@@ -0,0 +1,35 @@
+{
+    "trainer": {
+        "trainer_class": "PAnnealTrainer",
+        "dict_class": "AutoEncoder",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "sparsity_function": "Lp^p",
+        "sparsity_penalty": 0.025,
+        "p_start": 1.0,
+        "p_end": 0.2,
+        "anneal_start": 10000,
+        "sparsity_queue_length": 10,
+        "n_sparsity_updates": 10,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "resample_steps": null,
+        "steps": 244140,
+        "seed": 0,
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "PAnnealTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_5",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..f81e7b35f5a9e48dbf77d88d86b97d2d7922a5e2
--- /dev/null
+++ b/PAnneal_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 7.323505956006337, "l1_loss": 45.781148060258616, "l0": 27.82367721235896, "frac_variance_explained": 0.9210811422531864, "cossim": 0.9500051370586258, "l2_ratio": 0.9441123252891632, "relative_reconstruction_bias": 0.9976531385657299, "loss_original": 2.591329812285412, "loss_reconstructed": 3.0392324558223587, "loss_zero": 12.979128625019488, "frac_recovered": 0.951921901070928, "frac_alive": 0.6649169921875, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dda4b6c616aede0d43acccd29febc2ebb6ccc2f5
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:637d7a74d75dfeb5611865e6eefd62af3ff14bd794e71bb23b4b870eb3d66823
+size 100733608
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..16f852c39d973f982f62e36ac304cc2b9d788fac
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json
@@ -0,0 +1,29 @@
+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.012,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "StandardTrainerNew-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_6",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..4ba1bc994a2f285411f1d0bf09ac6e9ed595b1b8
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 4.413027272166976, "l1_loss": 86.36697617496353, "l0": 693.245662459408, "frac_variance_explained": 0.9715100422681097, "cossim": 0.9828673177454845, "l2_ratio": 0.9533848065927805, "relative_reconstruction_bias": 0.9820587484233351, "loss_original": 2.591329812285412, "loss_reconstructed": 2.683495038963226, "loss_zero": 12.979128625019488, "frac_recovered": 0.9900326175862048, "frac_alive": 0.74920654296875, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7bbe4302b832bef91c160b1f5a3122b6c797d73d
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4a64e454c445dde7f1db008c35629050bbc1b324f63915a78f6b6bdd0a8d4eb
+size 100733608
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cf1b8cde6fe988ed27e1ca4b15145b3b45014489
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json
@@ -0,0 +1,29 @@
+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.015,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "StandardTrainerNew-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_7",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..c9d6488e7c16cd03af3ab5e0ca2cd4f261f6261e
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 4.928134016243808, "l1_loss": 74.24602200611528, "l0": 495.6307437391166, "frac_variance_explained": 0.9644140550889164, "cossim": 0.9784741110830422, "l2_ratio": 0.946569280811103, "relative_reconstruction_bias": 0.9802226797643915, "loss_original": 2.591329812285412, "loss_reconstructed": 2.7142408533268663, "loss_zero": 12.979128625019488, "frac_recovered": 0.9867081437484327, "frac_alive": 0.748779296875, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..13b497e7f5d0a5982a60af454dea47e0af2b6de0
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2cc97dfc1553cda52477ef47bbd12bfd3d9b67cd84d00f299f16e760de4b4aa
+size 100733608
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..71825763c18ebd3144e3a958bd8518d834819b87
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json
@@ -0,0 +1,29 @@
+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.02,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "StandardTrainerNew-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_8",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..03d120debf579edfb258d5ed87b7d38295153443
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 5.58367193463337, "l1_loss": 60.69475557717932, "l0": 309.40750894201807, "frac_variance_explained": 0.9543272741587765, "cossim": 0.9720740648637335, "l2_ratio": 0.937349563980677, "relative_reconstruction_bias": 0.9785354941724295, "loss_original": 2.591329812285412, "loss_reconstructed": 2.765607705317348, "loss_zero": 12.979128625019488, "frac_recovered": 0.9812187724802868, "frac_alive": 0.7474365234375, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9670735a4ecdebc28b78fc517a92281626bf1e0b
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ff951fbe8914ed549cade9cde11e67532a9b930ab2fa67cc99aa2127fabef36
+size 100733608
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..057b563135847c087bfcbf02006f3297d862fc55
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json
@@ -0,0 +1,29 @@
+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.03,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "StandardTrainerNew-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_9",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..062d028c32dd73261745b63931b7b2f2d415334a
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 6.448484552911966, "l1_loss": 46.181850984872106, "l0": 154.64578734248516, "frac_variance_explained": 0.9382677713790571, "cossim": 0.9623248900275633, "l2_ratio": 0.9227385366537485, "relative_reconstruction_bias": 0.9744020757904972, "loss_original": 2.591329812285412, "loss_reconstructed": 2.869060355496694, "loss_zero": 12.979128625019488, "frac_recovered": 0.9701569353241518, "frac_alive": 0.74444580078125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..945bce018d21cccc6325d7a61642d2dfb92ff49f
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4abc8a0c9654e2036ee258402caf31b812b0bb67276a18f9aaf728e600751475
+size 100733608
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2d26d902a8ff3b2f84e71e6b1ed95b25d7cf0746
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json
@@ -0,0 +1,29 @@
+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.04,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "StandardTrainerNew-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_10",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..677885f825544382eed6e99912634f1f7443ded1
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 7.007212920361255, "l1_loss": 38.88443358547716, "l0": 96.43636142776673, "frac_variance_explained": 0.9274044618549118, "cossim": 0.9552818676793432, "l2_ratio": 0.911165978176048, "relative_reconstruction_bias": 0.9716554108154343, "loss_original": 2.591329812285412, "loss_reconstructed": 2.9624105699091072, "loss_zero": 12.979128625019488, "frac_recovered": 0.9601952480264457, "frac_alive": 0.74041748046875, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..76d1cbb996c5147a7ec7c24e44ab330448a696eb
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d017a06d11ba8a400b8df31cf9f233ecc02488257dd59e369485bf785732bb2
+size 100733608
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..816096af22ba35bf04d9b18cfc8c716645ea9849
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json
@@ -0,0 +1,29 @@
+{
+    "trainer": {
+        "dict_class": "AutoEncoder",
+        "trainer_class": "StandardTrainerAprilUpdate",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.06,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "steps": 244140,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "StandardTrainerNew-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_11",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..e1f6f77521dba8b4ddfe4e354c4d67786aa91c32
--- /dev/null
+++ b/Standard_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 7.761060510773256, "l1_loss": 31.28638026225998, "l0": 51.94378997618894, "frac_variance_explained": 0.9111872135874737, "cossim": 0.9451250573956823, "l2_ratio": 0.891550215971039, "relative_reconstruction_bias": 0.9651189223829523, "loss_original": 2.591329812285412, "loss_reconstructed": 3.1271837712770485, "loss_zero": 12.979128625019488, "frac_recovered": 0.9424630604594587, "frac_alive": 0.7325439453125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
\ No newline at end of file
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2823645518bd39dcfdabe846d84461e3d3c5af82
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab84d876da1b82169bb0f62636a8978a0b650df97e68406d5665d72630c56808
+size 100733974
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..b51618f24a577c78b72e258d6a14478a8fe5fb66
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json
@@ -0,0 +1,31 @@
+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 20,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "TopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_0",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..b55121944c00ea81d3a9b50571e769bfb521dbea
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 7.10956572041367, "l1_loss": 51.419726747455016, "l0": 19.99874230587121, "frac_variance_explained": 0.9261308095671914, "cossim": 0.9529453786936674, "l2_ratio": 0.9528289130239775, "relative_reconstruction_bias": 1.0015099120862556, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.9800217296137954, "loss_zero": 12.187079458525687, "frac_recovered": 0.9583556218580767, "frac_alive": 0.97100830078125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..67b43e57abffb5f73ae86c66e5c41f2246405cbd
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbc713e9ac91a6e19eaf21cea22a5b7163b6a8270d25e589aba4e168dba9917b
+size 100733974
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..aa13445c97a43918a608d8ed0e80d37b90196ddf
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json
@@ -0,0 +1,31 @@
+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 40,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "TopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_1",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..540471563fdf92a4fb779e354a36cfc151df0a76
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 6.298781915144487, "l1_loss": 65.19652025627367, "l0": 39.99936976577296, "frac_variance_explained": 0.9413592273538763, "cossim": 0.9631030758221945, "l2_ratio": 0.9631146662163012, "relative_reconstruction_bias": 1.0009909868240356, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.843784288926558, "loss_zero": 12.187079458525687, "frac_recovered": 0.9734502001242205, "frac_alive": 0.99285888671875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ff756e7971b8c3593470d48b4d5cf03959e5a6d8
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd85a9da075fefc69162072f4024a0a3e17a0040b371a783a117507d21e8bbb0
+size 100733974
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2f0397f73c00248c66c8dd0fc295a0b5849ee2c6
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json
@@ -0,0 +1,31 @@
+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 80,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "TopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_2",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..c54e69d95d57b54e2c6bfa307f958be75cfad5ca
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 5.48234257553563, "l1_loss": 88.40633161140211, "l0": 79.98960506554806, "frac_variance_explained": 0.9552180875431407, "cossim": 0.9720208825487079, "l2_ratio": 0.972093170339411, "relative_reconstruction_bias": 1.0002325664867053, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.7563261480042427, "loss_zero": 12.187079458525687, "frac_recovered": 0.9832018993117593, "frac_alive": 0.9927978515625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..08f3f014353c0386033c3c0cb691b3a105dc350d
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08529759e40db392f2e3b7a187bc51c1c033395f9c097975d1368215dbc096af
+size 100733974
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ed15e40305f3c40e8f7c52fb5113effe03e84090
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json
@@ -0,0 +1,31 @@
+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 160,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "TopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_3",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..3fec1840033465a734debe0f09cae295b138ba43
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 4.540305412176884, "l1_loss": 156.59205766157672, "l0": 159.97946166992188, "frac_variance_explained": 0.9691928065184391, "cossim": 0.9808063272273901, "l2_ratio": 0.9807897011439005, "relative_reconstruction_bias": 1.0003096334861987, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.6975065975478203, "loss_zero": 12.187079458525687, "frac_recovered": 0.9897829980561228, "frac_alive": 0.94097900390625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6fe1c1b1350daf809563357dedaa3452a0e97b5d
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45446e7dd3803d4145d6aa83dbe4ed406798232863ecbb4ea467cc796988ac41
+size 100733974
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..871240ce93caeca19614b54f6d20cf9ffe52d796
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json
@@ -0,0 +1,31 @@
+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 320,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "TopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_4",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..e153f12f40f2f0fa9aa9c4a8f053958dd7b6a747
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 3.127272938237046, "l1_loss": 286.77250070282906, "l0": 320.0, "frac_variance_explained": 0.98568051330971, "cossim": 0.9909948363448634, "l2_ratio": 0.9907523158824805, "relative_reconstruction_bias": 0.9999155582803668, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.6417208006887725, "loss_zero": 12.187079458525687, "frac_recovered": 0.9960357546806335, "frac_alive": 0.4578857421875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8acced3f3aee0ad8bcb1d6bc58b09210c89feeaa
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30dc6ca83384efc071536423f981653499aeb1b6d8c34cb812ec4b2f92bd9055
+size 100733974
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ae6c2d487f9a5cfb9ab442acd13243a5420a5e7f
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json
@@ -0,0 +1,31 @@
+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 640,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "TopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_5",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}
\ No newline at end of file
diff --git a/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..a2bd1080e742cedc8bff7ccc6de470c1ecef6b82
--- /dev/null
+++ b/TopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json
@@ -0,0 +1 @@
+{"l2_loss": 1.6481144536625256, "l1_loss": 607.7699584960938, "l0": 639.8206953568892, "frac_variance_explained": 0.995967478463144, "cossim": 0.9975080237244115, "l2_ratio": 0.9976696462342234, "relative_reconstruction_bias": 0.9993832635157036, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.6151728268825645, "loss_zero": 12.187079458525687, "frac_recovered": 0.9990135232607523, "frac_alive": 0.06243896484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
\ No newline at end of file