adamkarvonen commited on Feb 4

Commit

00189a8

verified ·

1 Parent(s): febe0e2

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json +32 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json +1 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json +32 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json +1 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json +32 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json +1 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json +32 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json +1 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json +32 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json +1 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt +3 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json +32 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json +1 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json +28 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json +1 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json +28 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json +1 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json +28 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json +1 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json +28 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json +1 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json +28 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json +1 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt +3 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json +28 -0
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json +1 -0
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json +29 -0
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json +1 -0
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json +29 -0
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json +1 -0
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json +29 -0
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json +1 -0
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json +29 -0
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json +1 -0
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json +29 -0

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:731f9e5cdb09067bf8e15f162aff6438e7e79db5a9987c735e4b28b97fe7d2b7
+size 100733974

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 20,
+        "device": "cuda:1",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_0",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:1"
+    }
+}

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 7.165702819824219, "l1_loss": 52.398841048731946, "l0": 19.910211158521246, "frac_variance_explained": 0.9256864298473705, "cossim": 0.9520920298316262, "l2_ratio": 0.9514842954548922, "relative_reconstruction_bias": 0.9993569128441088, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.9917597409450645, "loss_zero": 12.187079458525687, "frac_recovered": 0.9568525624997688, "frac_alive": 0.933837890625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da5a8f71335eba31afa1eca0d8b6f01790dec259246cbd6bc7225951a5486d9a
+size 100733974

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 40,
+        "device": "cuda:1",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_1",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:1"
+    }
+}

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 6.358675190896699, "l1_loss": 67.62095688328598, "l0": 39.82553227742513, "frac_variance_explained": 0.9409584890712391, "cossim": 0.9623859289920691, "l2_ratio": 0.9620392033548066, "relative_reconstruction_bias": 0.9997165492086699, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.852238994656187, "loss_zero": 12.187079458525687, "frac_recovered": 0.9724247419472897, "frac_alive": 0.92108154296875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fe65f3e4ad7fe582508b307625716ff197a04a3d371834c7222c023169597fbd
+size 100733974

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 80,
+        "device": "cuda:1",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_2",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:1"
+    }
+}

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 5.535941615249172, "l1_loss": 90.0740072076971, "l0": 79.61385576652758, "frac_variance_explained": 0.9549660375623992, "cossim": 0.9716276183272853, "l2_ratio": 0.9715044552629645, "relative_reconstruction_bias": 1.0009427467981975, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.7588755361961597, "loss_zero": 12.187079458525687, "frac_recovered": 0.9828645659215522, "frac_alive": 0.873291015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:62d31b3f692684c53b5c5e8ff60b739baa9a0daf57abe707da33701733063e7d
+size 100733974

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 160,
+        "device": "cuda:1",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_3",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:1"
+    }
+}

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 4.596404407963608, "l1_loss": 153.62816642992425, "l0": 159.1172462232185, "frac_variance_explained": 0.9690298311638109, "cossim": 0.98062437953371, "l2_ratio": 0.9803878791404493, "relative_reconstruction_bias": 1.0007607864611077, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.6916814464511294, "loss_zero": 12.187079458525687, "frac_recovered": 0.9904219201116851, "frac_alive": 0.7470703125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff290fc3b27d35eee2431b923928da0a5530eaf02380cc605a3f04aa3bc61287
+size 100733974

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 320,
+        "device": "cuda:1",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_4",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:1"
+    }
+}

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 3.1190587029312598, "l1_loss": 295.7740866921165, "l0": 318.5566424745502, "frac_variance_explained": 0.9860216288855581, "cossim": 0.9912758794697848, "l2_ratio": 0.9912153554685188, "relative_reconstruction_bias": 1.0004844918395535, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.63657668503848, "loss_zero": 12.187079458525687, "frac_recovered": 0.9965930906209078, "frac_alive": 0.32232666015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:405bcdf303076eb4e67b6dcd34a2c041278a2574400fd19e2deb02eb9df12560
+size 100733974

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 384,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "k": 640,
+        "device": "cuda:1",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_5",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:1"
+    }
+}

BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 1.5527055624759558, "l1_loss": 626.5316938920455, "l0": 639.1975338097775, "frac_variance_explained": 0.9963156291932771, "cossim": 0.9977991869955352, "l2_ratio": 0.9976590293826479, "relative_reconstruction_bias": 1.0002403476021506, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.6137762756058662, "loss_zero": 12.187079458525687, "frac_recovered": 0.9991628256711093, "frac_alive": 0.06146240234375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50b23e31ad3ee499518d7bd643be829a3ee7b86b236059a0146799d475bb1c6c
+size 100865046

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+    "trainer": {
+        "dict_class": "GatedAutoEncoder",
+        "trainer_class": "GatedSAETrainer",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.012,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_12",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 2.449277211384601, "l1_loss": 233.9408302766731, "l0": 483.06496126105986, "frac_variance_explained": 0.9913476149001753, "cossim": 0.9946498181446489, "l2_ratio": 0.9939263398388782, "relative_reconstruction_bias": 0.9993272940796541, "loss_original": 2.591329812285412, "loss_reconstructed": 2.6073691378156822, "loss_zero": 12.979128625019488, "frac_recovered": 0.9982530004288777, "frac_alive": 0.59857177734375, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c989b27ea8ba899f086eb1e4bbf13a21a3e8ebc86900fb5d588fd7f804b814f4
+size 100865046

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+    "trainer": {
+        "dict_class": "GatedAutoEncoder",
+        "trainer_class": "GatedSAETrainer",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.018,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_13",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 3.718252821140979, "l1_loss": 174.70882599612315, "l0": 357.33407280244023, "frac_variance_explained": 0.9797811321465366, "cossim": 0.9874855103981064, "l2_ratio": 0.9873908984373851, "relative_reconstruction_bias": 1.0000614502343788, "loss_original": 2.591329812285412, "loss_reconstructed": 2.638556733906987, "loss_zero": 12.979128625019488, "frac_recovered": 0.9948906988264566, "frac_alive": 0.55438232421875, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0818ec859a15998633c73ad5dce110641f60f0b7b999aafd459d8bcaafa407d
+size 100865046

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+    "trainer": {
+        "dict_class": "GatedAutoEncoder",
+        "trainer_class": "GatedSAETrainer",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.024,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_14",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 4.538920862128935, "l1_loss": 141.84569384103798, "l0": 227.8396312299981, "frac_variance_explained": 0.9699712948626783, "cossim": 0.9812705624534424, "l2_ratio": 0.98167203241084, "relative_reconstruction_bias": 1.000891358737486, "loss_original": 2.591329812285412, "loss_reconstructed": 2.6714729882148376, "loss_zero": 12.979128625019488, "frac_recovered": 0.9913170700331768, "frac_alive": 0.71710205078125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01de966bee6924eeaadb356982edc47ee4ae5d8c402495317c21dfafaf2361f7
+size 100865046

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+    "trainer": {
+        "dict_class": "GatedAutoEncoder",
+        "trainer_class": "GatedSAETrainer",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.04,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_15",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 5.540974214852574, "l1_loss": 94.82435709022614, "l0": 95.3962563204478, "frac_variance_explained": 0.9552597058824746, "cossim": 0.9718739264700786, "l2_ratio": 0.9708090194736618, "relative_reconstruction_bias": 0.9997583469712591, "loss_original": 2.591329812285412, "loss_reconstructed": 2.7390204301799637, "loss_zero": 12.979128625019488, "frac_recovered": 0.9840897005724619, "frac_alive": 0.9267578125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2eb06802eee053782d3d338803d4f408ce84b0a1cc281748a266cb4f772a956e
+size 100865046

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+    "trainer": {
+        "dict_class": "GatedAutoEncoder",
+        "trainer_class": "GatedSAETrainer",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.06,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_16",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 6.3163925567305235, "l1_loss": 64.96194687808853, "l0": 48.62305409075266, "frac_variance_explained": 0.9414439212126904, "cossim": 0.9631719384566847, "l2_ratio": 0.9623617971517954, "relative_reconstruction_bias": 0.9998143462531538, "loss_original": 2.591329812285412, "loss_reconstructed": 2.8256030183240592, "loss_zero": 12.979128625019488, "frac_recovered": 0.9748218806393175, "frac_alive": 0.92987060546875, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:492aae9f99769dba8c556b726451e369b1eeb33b1baf558a90444dbfad618fb2
+size 100865046

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+    "trainer": {
+        "dict_class": "GatedAutoEncoder",
+        "trainer_class": "GatedSAETrainer",
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "lr": 0.0003,
+        "l1_penalty": 0.08,
+        "warmup_steps": 1000,
+        "sparsity_warmup_steps": 5000,
+        "decay_start": 195312,
+        "seed": 0,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_17",
+        "submodule_name": "resid_post_layer_8"
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 6.866190913211868, "l1_loss": 55.27316311755812, "l0": 30.90061481889472, "frac_variance_explained": 0.9303119121545769, "cossim": 0.9563094456511808, "l2_ratio": 0.9556090986154165, "relative_reconstruction_bias": 0.9998748388635107, "loss_original": 2.591329812285412, "loss_reconstructed": 2.912389753812767, "loss_zero": 12.979128625019488, "frac_recovered": 0.9655166954161173, "frac_alive": 0.86328125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}

JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35b66366e463e23602a12aa239decfaf82325071984f6e48fbfc53f1e97e1aac
+size 100799263

JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "JumpReluTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_30",
+        "submodule_name": "resid_post_layer_8",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 20
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 7.542313641812428, "l1_loss": 59.5405657664839, "l0": 20.316411776715015, "frac_variance_explained": 0.9169782827417534, "cossim": 0.9468502043241478, "l2_ratio": 0.9470895029694201, "relative_reconstruction_bias": 1.0005664681813804, "loss_original": 2.591329812285412, "loss_reconstructed": 3.063094075903835, "loss_zero": 12.979128625019488, "frac_recovered": 0.9492282486823668, "frac_alive": 0.423095703125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}

JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52a73e6d5540b5666e46b3ce47c868f81846d1ac46cc3f2fd0ed174158fe95e3
+size 100799263

JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "JumpReluTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_31",
+        "submodule_name": "resid_post_layer_8",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 40
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 6.596800962126399, "l1_loss": 68.14624850721245, "l0": 40.465813809130566, "frac_variance_explained": 0.9358430052378092, "cossim": 0.9595368070056639, "l2_ratio": 0.9597559456365654, "relative_reconstruction_bias": 1.0001378511808006, "loss_original": 2.591329812285412, "loss_reconstructed": 2.866633120071457, "loss_zero": 12.979128625019488, "frac_recovered": 0.9703360113752894, "frac_alive": 0.53546142578125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}

JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b355bdff67e88e1ccf0c30ff781cd2408f566c203a2ffea6d730c47faf7e6958
+size 100799263

JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "JumpReluTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_32",
+        "submodule_name": "resid_post_layer_8",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 80
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 5.868561931403287, "l1_loss": 87.49280005765249, "l0": 73.93405321419957, "frac_variance_explained": 0.9491253247462124, "cossim": 0.9681477079908531, "l2_ratio": 0.9674936364214104, "relative_reconstruction_bias": 0.9984568217432642, "loss_original": 2.591329812285412, "loss_reconstructed": 2.771394150802888, "loss_zero": 12.979128625019488, "frac_recovered": 0.9805464475269777, "frac_alive": 0.52166748046875, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}

JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57b55991b9ed3a61bfbd12123f73db672397e9e3917e4eb1a14a0dc8e05792dc
+size 100799263

JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "JumpReluTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_33",
+        "submodule_name": "resid_post_layer_8",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 160
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 4.841383078012122, "l1_loss": 130.77086565867964, "l0": 155.94150111186934, "frac_variance_explained": 0.9658320416887123, "cossim": 0.9784838686506432, "l2_ratio": 0.978472747716559, "relative_reconstruction_bias": 1.0000444162322815, "loss_original": 2.591329812285412, "loss_reconstructed": 2.6872186675129166, "loss_zero": 12.979128625019488, "frac_recovered": 0.9896216367382601, "frac_alive": 0.4205322265625, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}

JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:123ae303303f92498d1269b0e4c32df87c055d51b5bb87f6e7d5cbfbb23efca7
+size 100799263

JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "trainer": {
+        "trainer_class": "JumpReluTrainer",
+        "dict_class": "JumpReluAutoEncoder",
+        "lr": 0.0003,
+        "steps": 244140,
+        "seed": 0,
+        "activation_dim": 768,
+        "dict_size": 16384,
+        "device": "cuda:0",
+        "layer": 8,
+        "lm_name": "EleutherAI/pythia-160m-deduped",
+        "wandb_name": "JumpReluTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_34",
+        "submodule_name": "resid_post_layer_8",
+        "bandwidth": 0.001,
+        "sparsity_penalty": 1.0,
+        "sparsity_warmup_steps": 5000,
+        "target_l0": 320
+    },
+    "buffer": {
+        "d_submodule": 768,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 32,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}