diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d2bde84c47ba5b1fb750c76c355f0f5e8145c73 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4a53998d60d76090fb5f52a4b8f7f9e5993c4747484a0ebf9ca85368df9fbc0 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..aa3be4e2cf7e33b42536c63e78eb0e0b32d36f7d --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.012, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 0, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0", + "submodule_name": "resid_post_layer_0" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..17f41d3e65d59943c63ee21f42761b426a15e692 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.8880099132657051, "l1_loss": 24.71984565258026, "l0": 289.50769233703613, "frac_variance_explained": 0.9763443879783154, "cossim": 0.992082379758358, "l2_ratio": 0.9707153402268887, "relative_reconstruction_bias": 0.9786949455738068, "loss_original": 2.988885059952736, "loss_reconstructed": 3.016266644001007, "loss_zero": 8.828418850898743, "frac_recovered": 0.9953007213771343, "frac_alive": 0.7119140625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..84e0621e7ff9e016370818f533b9914408cad073 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b9f5bcb3f911be5660637af45cfb788366b3a921fd989b4adec1c90dca78e69 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1322dd6d6df3acd6a7a176a9a8dedd7cb4eaf6ca --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.015, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 0, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0", + "submodule_name": "resid_post_layer_0" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3b99c9fce0c7ce1d6abe2d03c8437d6212abbcde --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.9838254600763321, "l1_loss": 22.220465064048767, "l0": 232.73355770111084, "frac_variance_explained": 0.9710042513906956, "cossim": 0.9902598187327385, "l2_ratio": 0.9662889577448368, "relative_reconstruction_bias": 0.9760206341743469, "loss_original": 2.988885059952736, "loss_reconstructed": 3.024588406085968, "loss_zero": 8.828418850898743, "frac_recovered": 0.9938735403120518, "frac_alive": 0.7171630859375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..261481bc20806b13acf2808ad0a937f4007a02d3 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46421d60d7e5e3802d52e75af8f4392e5ad4cd6b0c25058dd11dbaf36ca4cb51 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..10c40a189a12398a62db49a2b5a4ca775d875517 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.02, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 0, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0", + "submodule_name": "resid_post_layer_0" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b1a4371f4096e72048555c19e2dbd1fe40523c79 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.1100959181785583, "l1_loss": 19.30671751499176, "l0": 169.3354959487915, "frac_variance_explained": 0.9629898369312286, "cossim": 0.9875496290624142, "l2_ratio": 0.9594511836767197, "relative_reconstruction_bias": 0.971693217754364, "loss_original": 2.988885059952736, "loss_reconstructed": 3.035519316792488, "loss_zero": 8.828418850898743, "frac_recovered": 0.9920109137892723, "frac_alive": 0.7247314453125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..98b025d307d1e083fdb1459b9f2f4c5bad19d24c --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aedba6b88aece44504d5278055fc7095c799878107f9b1f3e7e1284655e9b7f +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..42c91264ac7c9fdef1a33433035228c1f4bd7d39 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.03, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 0, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0", + "submodule_name": "resid_post_layer_0" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f4b356b6eaa643f62b8cd75e7ca01cdf59f024d4 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.2951818630099297, "l1_loss": 15.69888824224472, "l0": 103.05662822723389, "frac_variance_explained": 0.9493058025836945, "cossim": 0.9829495437443256, "l2_ratio": 0.9483063369989395, "relative_reconstruction_bias": 0.9647174514830112, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0596881806850433, "loss_zero": 8.828418850898743, "frac_recovered": 0.9878532290458679, "frac_alive": 0.73175048828125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c8da9be346f55cf30ba55cbdf1545f51204a104 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ddf297c7acecace4c111bda5f154add817584cb99367df5d1555cbabd344ed3 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db414b5f306d4badd42cc687a49a2c50a69cd459 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 0, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0", + "submodule_name": "resid_post_layer_0" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8ce9f1c207719fbf75b62ba49fbaf1edc078e8e0 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.4320023581385612, "l1_loss": 13.441674709320068, "l0": 70.04790019989014, "frac_variance_explained": 0.9376466050744057, "cossim": 0.9790421277284622, "l2_ratio": 0.9389512911438942, "relative_reconstruction_bias": 0.9588979072868824, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0810854732990265, "loss_zero": 8.828418850898743, "frac_recovered": 0.9842088855803013, "frac_alive": 0.736328125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a72df91d9c668a02fbd048a2c030d1c15a6d1c61 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:262ad703e05eff4dcbcb23e0d0c8f699830b629590e3911ff7c481b6a727c3de +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..65484f3b0ec39004b62940ff20e1d1a5499cde48 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 0, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0", + "submodule_name": "resid_post_layer_0" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3354ec18837e3519ac8a44403f87e08a4d595dbc --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.6484556645154953, "l1_loss": 10.752245247364044, "l0": 39.29365658760071, "frac_variance_explained": 0.9167096689343452, "cossim": 0.9719779714941978, "l2_ratio": 0.9227463938295841, "relative_reconstruction_bias": 0.9491799250245094, "loss_original": 2.988885059952736, "loss_reconstructed": 3.131672754883766, "loss_zero": 8.828418850898743, "frac_recovered": 0.9755509421229362, "frac_alive": 0.73443603515625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5269436e9673e1825da0b057ea6f01bd59e2c7a --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b08da4344dfcf1007773109a0e3c6940a53cb334bb16dbd0eedf3e8807addae +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d79b3a59ac5a8f5385b550e156152ca1f4c26390 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.012, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 1, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1", + "submodule_name": "resid_post_layer_1" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ca8f9b43ef791dd2b898784a5a5431c97dcd2cb9 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.3996912688016891, "l1_loss": 42.294498443603516, "l0": 848.6530647277832, "frac_variance_explained": 0.9665217883884907, "cossim": 0.9879145994782448, "l2_ratio": 0.9566630609333515, "relative_reconstruction_bias": 0.970756758004427, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0269704312086105, "loss_zero": 8.97075641155243, "frac_recovered": 0.9936139024794102, "frac_alive": 0.69598388671875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..796076b8f2d7ef3d778c21df65f91e3ff8ccf8b7 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08dabf4afc722af480fa50ea4c71aeea6a686b413f88b8daa0cd6e6c54497526 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..69cfc749bfe944ad105dd12b9bea5432705b80c1 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.015, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 1, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1", + "submodule_name": "resid_post_layer_1" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a59f81213b217924aaf8fda4ee9ad7a5ed5d1f56 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.5650566518306732, "l1_loss": 37.678173542022705, "l0": 667.7053680419922, "frac_variance_explained": 0.9581456147134304, "cossim": 0.9848296083509922, "l2_ratio": 0.9501992017030716, "relative_reconstruction_bias": 0.9674350023269653, "loss_original": 2.988885059952736, "loss_reconstructed": 3.038178339600563, "loss_zero": 8.97075641155243, "frac_recovered": 0.9917236343026161, "frac_alive": 0.705078125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9023341170b6438d4aef36cf8a82984b72e7472f --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a4f6e0256082fd3383b69641e8c1bbdef418eeafdc2ac7e3a8aec772d945261 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3d393f261e0419bfe8f65b2206c6bb0a914bdaa3 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.02, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 1, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1", + "submodule_name": "resid_post_layer_1" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b05fa3c457da4da309fec3d44793bcd781261b18 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.7955679446458817, "l1_loss": 31.955636978149414, "l0": 463.37560272216797, "frac_variance_explained": 0.9448460824787617, "cossim": 0.9799039028584957, "l2_ratio": 0.9401619769632816, "relative_reconstruction_bias": 0.9621202535927296, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0586147010326385, "loss_zero": 8.97075641155243, "frac_recovered": 0.9883030280470848, "frac_alive": 0.71124267578125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..62b0fb1693a2644126a68788226c8620896756a9 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0600706d266241f97018ce9cc759dce3a5522e6158ceef7295b6f21093855ab9 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b91d161fb38f444089ce089e504cc89ea8e94545 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.03, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 1, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1", + "submodule_name": "resid_post_layer_1" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..25ff094ad7783b99c642e318acb1f39f4f2bbd1a --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.1483198404312134, "l1_loss": 24.6392685174942, "l0": 251.70951080322266, "frac_variance_explained": 0.9206176958978176, "cossim": 0.9708879068493843, "l2_ratio": 0.9243263490498066, "relative_reconstruction_bias": 0.9547557085752487, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0947447419166565, "loss_zero": 8.97075641155243, "frac_recovered": 0.9822593331336975, "frac_alive": 0.7169189453125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f082e52bc320cb3f696f8d2ef52aec85b4f8811 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03813180d194a02464de0c2e3ca2a94516a58d766df99d15bc9bf20ae5d87519 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..df5a1e9cedb26b39f78a77db3d84ca21389d8878 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 1, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1", + "submodule_name": "resid_post_layer_1" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..428c04d13669c000003f0ccb0c174b963eba6ec6 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.406396448612213, "l1_loss": 20.208199858665466, "l0": 153.5412302017212, "frac_variance_explained": 0.9000130482017994, "cossim": 0.9631378278136253, "l2_ratio": 0.9116865955293179, "relative_reconstruction_bias": 0.9493150822818279, "loss_original": 2.988885059952736, "loss_reconstructed": 3.1343201994895935, "loss_zero": 8.97075641155243, "frac_recovered": 0.9756543561816216, "frac_alive": 0.71405029296875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..535f637069777606f097462efa8b03eac775e136 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b80de1f4b215560289634e7a53b3ba1cadeb76cd53f4b9ed75be1d808afcc31 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fa83e2fdaac2f38cbbb07d2df2ac25342ae75b81 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 1, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1", + "submodule_name": "resid_post_layer_1" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0eac197ee01f11b243dc630ef9199e827046c088 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.771856963634491, "l1_loss": 15.305355727672577, "l0": 73.31960344314575, "frac_variance_explained": 0.8663658648729324, "cossim": 0.9504912123084068, "l2_ratio": 0.8903653882443905, "relative_reconstruction_bias": 0.9395905695855618, "loss_original": 2.988885059952736, "loss_reconstructed": 3.2098177671432495, "loss_zero": 8.97075641155243, "frac_recovered": 0.9631249755620956, "frac_alive": 0.68414306640625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4926b426d07f89540d3ba0b533acaff4f58a8e30 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9696848df3866e7f6ede6a8f00e462144c0277b9554abf81ac1810b7e2273ea7 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f5b3a0112ed38d13e927e03ebfe3ab701868209e --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.012, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 2, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2", + "submodule_name": "resid_post_layer_2" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2b212afefef6378509139f594d9bbd309635b44c --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.112189382314682, "l1_loss": 56.772852659225464, "l0": 828.3504066467285, "frac_variance_explained": 0.9771254733204842, "cossim": 0.9856770560145378, "l2_ratio": 0.9464036673307419, "relative_reconstruction_bias": 0.9773441404104233, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0675582885742188, "loss_zero": 9.41981041431427, "frac_recovered": 0.9877439066767693, "frac_alive": 0.65985107421875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b75481d897553804880d98cf9f7a5c91734198ee --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a2518ab07b7e838c367313557ee4e80e503934c23b97b703ac8fb6e3b73b479 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..461f04e098aec3ddfdcbbf4d7b7537b44b94831b --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.015, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 2, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2", + "submodule_name": "resid_post_layer_2" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c4a678e67c548ee3c3102c9de1174bfac3114c4e --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.363228440284729, "l1_loss": 50.97706913948059, "l0": 656.342887878418, "frac_variance_explained": 0.971705749630928, "cossim": 0.9820158407092094, "l2_ratio": 0.9374741353094578, "relative_reconstruction_bias": 0.9745247662067413, "loss_original": 2.988885059952736, "loss_reconstructed": 3.095957577228546, "loss_zero": 9.41981041431427, "frac_recovered": 0.9833251647651196, "frac_alive": 0.65740966796875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..438924faaffc3a96cd8e611d9804dc108b819eb1 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:239de60a10fa562f5d2012ad482a7386cffb7f5e09b42cb6222afde0fd33c9be +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5b1073f3be8a68bc3891befd67c74fa9633382e3 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.02, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 2, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2", + "submodule_name": "resid_post_layer_2" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c344b94d6faca358ef412f64e07c603f52b79f2b --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.7178614288568497, "l1_loss": 43.7660813331604, "l0": 471.93741607666016, "frac_variance_explained": 0.9619039408862591, "cossim": 0.9760403409600258, "l2_ratio": 0.924134623259306, "relative_reconstruction_bias": 0.9698718897998333, "loss_original": 2.988885059952736, "loss_reconstructed": 3.1424050331115723, "loss_zero": 9.41981041431427, "frac_recovered": 0.9760990291833878, "frac_alive": 0.6507568359375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f31b35e0f5e538d0ec0fc9c4b4c0982345c6c0c --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:826b28d6e0e275ef3c1e1a34ff8b11dd035b1b2d3cd7097afc3ab798bff7b0d0 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cc924c906e5e92f6bc5f1fb0b08693e9cab76e9b --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.03, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 2, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2", + "submodule_name": "resid_post_layer_2" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..42822b90e7dc9017b060d2fe04d60735bbcdd617 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.258958250284195, "l1_loss": 34.640897035598755, "l0": 280.277286529541, "frac_variance_explained": 0.9457531869411469, "cossim": 0.9651481173932552, "l2_ratio": 0.9025088399648666, "relative_reconstruction_bias": 0.9640039317309856, "loss_original": 2.988885059952736, "loss_reconstructed": 3.245699033141136, "loss_zero": 9.41981041431427, "frac_recovered": 0.9601433612406254, "frac_alive": 0.62640380859375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8433756d8c5b75d185acd265dce5fbbbd5d1eed2 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc0e4a110f8a5d38254afc3fd71272f2de44b6cdb3c62c48f617484400d56d35 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f1eba892106a011dd86140e1aaab3af0eb21a618 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 2, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2", + "submodule_name": "resid_post_layer_2" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f72bf2829fdc6f6a34d065ab7d95a27cff222af6 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.6554054021835327, "l1_loss": 29.19662058353424, "l0": 189.00453758239746, "frac_variance_explained": 0.9309603720903397, "cossim": 0.9557722173631191, "l2_ratio": 0.8842405490577221, "relative_reconstruction_bias": 0.9582779221236706, "loss_original": 2.988885059952736, "loss_reconstructed": 3.3518753051757812, "loss_zero": 9.41981041431427, "frac_recovered": 0.9437170922756195, "frac_alive": 0.59381103515625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3c11de976c60b9ccea66a225fd39c3a563fdfc7 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d83ea861915d3fe7f775a11094991355ed215c4f45f95b5b04d0fcb5240ef4af +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..75f6f6aa6780bb222537fe15a620ec6a1a02a54a --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 2, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2", + "submodule_name": "resid_post_layer_2" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3f619f057a9205ecd9cbc9e8f422731e38229c5a --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.251816630363464, "l1_loss": 23.12629735469818, "l0": 105.23889970779419, "frac_variance_explained": 0.9067664258182049, "cossim": 0.9394930563867092, "l2_ratio": 0.8522851131856441, "relative_reconstruction_bias": 0.9486487060785294, "loss_original": 2.988885059952736, "loss_reconstructed": 3.5442482978105545, "loss_zero": 9.41981041431427, "frac_recovered": 0.9140450395643711, "frac_alive": 0.51416015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9c562393adf26f853888ff223176dc8f7e16632 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e35ae9587f0d97450d4282d25532cf9b631637e33fb011e16f44f0e61b4e5d50 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d1d85b2c82e28907ac61cc6db0badf88b3bab227 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.012, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c115ad7ee1e592c7fae28a04e29aeeb51ec41d90 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.3384958654642105, "l1_loss": 61.856218338012695, "l0": 813.3695487976074, "frac_variance_explained": 0.9762563109397888, "cossim": 0.9862280264496803, "l2_ratio": 0.9499441534280777, "relative_reconstruction_bias": 0.9780955091118813, "loss_original": 2.988885059952736, "loss_reconstructed": 3.068489745259285, "loss_zero": 12.706798493862152, "frac_recovered": 0.9917979650199413, "frac_alive": 0.635009765625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..474582391a354ebd36f82b7fc4acdfaeb887f126 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fbc6dd4d8dd9cf71c90a12858631d973fcff8fb25dc3fc73901ed1a38e482d9 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b2a0732ce36847940b52ec37012308fd3dce739 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.015, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..898b556b29e2971186f81c55bc0a0c781122a4ba --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.6318285167217255, "l1_loss": 55.182172775268555, "l0": 633.0201950073242, "frac_variance_explained": 0.9701960906386375, "cossim": 0.9824677594006062, "l2_ratio": 0.9417072683572769, "relative_reconstruction_bias": 0.9755277708172798, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0936404317617416, "loss_zero": 12.706798493862152, "frac_recovered": 0.9892093501985073, "frac_alive": 0.63555908203125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d359fd49dae5b2dd003fa585e35667b34b869597 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82efd999d39786c3756acd149db051df475b29e5a7288790182c52954a4f1b76 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4ca277b898a55e230ba542b833f603efa4560063 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.02, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f9a092c99e2ed0cd0039acb982883448329d030d --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.046293765306473, "l1_loss": 46.7968225479126, "l0": 437.7641773223877, "frac_variance_explained": 0.9599426276981831, "cossim": 0.9763277806341648, "l2_ratio": 0.9297848306596279, "relative_reconstruction_bias": 0.9719068892300129, "loss_original": 2.988885059952736, "loss_reconstructed": 3.139363631606102, "loss_zero": 12.706798493862152, "frac_recovered": 0.984502125531435, "frac_alive": 0.63140869140625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0cba5c462ab4eae047017c61227b9e0e02f5b02d --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:530c4d2062d37c69ed2bf6aa03a308d402f53f3a0f5c97e702a51a78c37de707 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6aabffb2ecc08dd219c8c0934d122af8fa2c1e57 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.03, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..957513bcabc257b6234bed963c46192fae9aed1e --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.6727802455425262, "l1_loss": 36.08820199966431, "l0": 237.56428050994873, "frac_variance_explained": 0.9422685727477074, "cossim": 0.9651253446936607, "l2_ratio": 0.9104792773723602, "relative_reconstruction_bias": 0.96690334379673, "loss_original": 2.988885059952736, "loss_reconstructed": 3.2347768396139145, "loss_zero": 12.706798493862152, "frac_recovered": 0.9746854156255722, "frac_alive": 0.615966796875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff089f2c22562a454249dc04a731284a4afbed5e --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b1f638f1acc644335b44e8cef6dda0eea065c96b3ea3821d1bf2ed9d173898c +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d699d7ba090ad6588a97bf4f88247da75aa117cf --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f8dfedffe5019644f6efdb38c3776fc2c2bbf82d --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.119953870773315, "l1_loss": 29.75027644634247, "l0": 147.1018772125244, "frac_variance_explained": 0.9249130897223949, "cossim": 0.9557054415345192, "l2_ratio": 0.8947230316698551, "relative_reconstruction_bias": 0.9617413580417633, "loss_original": 2.988885059952736, "loss_reconstructed": 3.329866886138916, "loss_zero": 12.706798493862152, "frac_recovered": 0.9649113863706589, "frac_alive": 0.5899658203125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d37710bdd4788087d765f971b349706550eb6b0 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8a4e3d1192805585ad5f73040e6e84c182a6194bd22a839f3e146eb758c2eb8 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e95e89bc6940c220c402f334fe0e70c82fc210c3 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3fe2368e598d7221238690bef1dc923a5fecd000 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.757508963346481, "l1_loss": 23.0547913312912, "l0": 74.11862897872925, "frac_variance_explained": 0.9014989510178566, "cossim": 0.940273642539978, "l2_ratio": 0.8682818002998829, "relative_reconstruction_bias": 0.9545568190515041, "loss_original": 2.988885059952736, "loss_reconstructed": 3.509969547390938, "loss_zero": 12.706798493862152, "frac_recovered": 0.946451760828495, "frac_alive": 0.4993896484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..36c43e60a8a4e8d1a26133fd48e33c8f1c4bd5de --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94a5746fd6f47c259540b45c1134e6dc91b7e2deba813cdad85ccf12f2542ffb +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..02ffd5c83e821a59e6b88489975d47b36e03ab75 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.012, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..80c459f1eba48cca3bd34905d9b0b36ccbffa104 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.481657952070236, "l1_loss": 78.372483253479, "l0": 1104.934959411621, "frac_variance_explained": 0.9696078486740589, "cossim": 0.9875891730189323, "l2_ratio": 0.9511074610054493, "relative_reconstruction_bias": 0.9710834212601185, "loss_original": 2.988885059952736, "loss_reconstructed": 3.037155047059059, "loss_zero": 8.795905828475952, "frac_recovered": 0.9916625134646893, "frac_alive": 0.677734375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a152caabca5440fb5856b16020a371076f0ff6b2 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c88ca9a580ce21047d51fdee3d456f63fa253abfcd9a1b6cc4d4237addba8cb3 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6627306dc04eff6569d49a4e6a33c02a82722c88 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.015, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b79bbcf97bee9888790ee9b38744dc91f6d457ca --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.8078664541244507, "l1_loss": 70.37771654129028, "l0": 887.7868385314941, "frac_variance_explained": 0.9610760435461998, "cossim": 0.984023742377758, "l2_ratio": 0.9428759440779686, "relative_reconstruction_bias": 0.9672905653715134, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0519212037324905, "loss_zero": 8.795905828475952, "frac_recovered": 0.9891255982220173, "frac_alive": 0.6827392578125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fc312c0de9123c410b0dcf1bc153623e178e4f1 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f401113ff70b7faa53a31917269edf8886e8f87fa38562a93bfafc2d06c028c6 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..40b978f0cec5d50d0dedcf362c38fe8ec855454e --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.02, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1bb012f32c1b5854b8b5cd608a99e2e95d6a86f8 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.271893948316574, "l1_loss": 60.19800424575806, "l0": 636.9362487792969, "frac_variance_explained": 0.9472561627626419, "cossim": 0.9781175889074802, "l2_ratio": 0.9306827336549759, "relative_reconstruction_bias": 0.9621085040271282, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0828556567430496, "loss_zero": 8.795905828475952, "frac_recovered": 0.9838020391762257, "frac_alive": 0.68707275390625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..17f8dbc1ddc2fb2e3094c46d767a4444293cdcdd --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55666fc0950359adad764fe1269b7b221501f42e09940a1a436225cd9fc99bb4 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..77be15b0cdd74bbff2a3885a2408ec1cd2f03e0a --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.03, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..06435ceaaf406643ebbfe4c2ee35c66df2a105e9 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.014227330684662, "l1_loss": 46.28994297981262, "l0": 357.406400680542, "frac_variance_explained": 0.9202631525695324, "cossim": 0.9665162414312363, "l2_ratio": 0.9105423055589199, "relative_reconstruction_bias": 0.9543868005275726, "loss_original": 2.988885059952736, "loss_reconstructed": 3.1418560594320297, "loss_zero": 8.795905828475952, "frac_recovered": 0.9736704602837563, "frac_alive": 0.68572998046875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e735e925c1cb8f4816d4240bfde694d2c7d9d316 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcbdaa0b014fefd55def0561a14e9eb85bc0e84781ac75c1dca91ec17545aa58 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0002c9c43261f3b5495097c06de221b0e0b87598 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..441f09f4cd7d2cfacb19e4756f42586a47cde097 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.576290339231491, "l1_loss": 37.41896200180054, "l0": 218.51405429840088, "frac_variance_explained": 0.8959895148873329, "cossim": 0.9559022039175034, "l2_ratio": 0.8947892338037491, "relative_reconstruction_bias": 0.9493623934686184, "loss_original": 2.988885059952736, "loss_reconstructed": 3.2071598172187805, "loss_zero": 8.795905828475952, "frac_recovered": 0.9625203423202038, "frac_alive": 0.67864990234375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5d93cc024d2fb836ace8d9d68a9dc5ed243e360 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c05e43246aaf7a6ef220bf73be9670783cc50817ea7003caf1a81f6d960da65 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7e948497193d52ef798882d8303aac574643679 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2dadd0c989971254f8e6ab60336f5fffe409b1a8 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.349083185195923, "l1_loss": 27.35872220993042, "l0": 100.07988166809082, "frac_variance_explained": 0.8574326634407043, "cossim": 0.9386947937309742, "l2_ratio": 0.870007298886776, "relative_reconstruction_bias": 0.9416053779423237, "loss_original": 2.988885059952736, "loss_reconstructed": 3.3274282813072205, "loss_zero": 8.795905828475952, "frac_recovered": 0.9419161006808281, "frac_alive": 0.639892578125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3300f202cd14011aca4536b56a6a5ddc49f5479 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d2e64886ee4a333d1e36ee4c3348e75ef27ac09c66dbc83473c06e61b9979fc +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3df4a9968d2aefec99d3490e07cbb45dafacff5d --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.012, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 5, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..97c166f99d5951892750965368bd565d4b2d11cd --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.185102969408035, "l1_loss": 57.67960834503174, "l0": 192.87819480895996, "frac_variance_explained": 0.9099478796124458, "cossim": 0.9943223632872105, "l2_ratio": 0.990757942199707, "relative_reconstruction_bias": 0.9958789199590683, "loss_original": 2.988885059952736, "loss_reconstructed": 3.4081849455833435, "loss_zero": 8.736501514911652, "frac_recovered": 0.9268981963396072, "frac_alive": 0.24481201171875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..76debfad330d96b8fad966aff7005df4775a7549 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52408787d26db5c95f47a8b4605e6102560a6e3491d78bedf61f0d1bace50f3c +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..aaff800c9a3573749e5d9f9d258d89c83210070f --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.015, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 5, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0e4136a52516e3b764efd765768f7a8ed7df2a24 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.883693635463715, "l1_loss": 47.91144323348999, "l0": 125.85590314865112, "frac_variance_explained": 0.8912591375410557, "cossim": 0.9931078404188156, "l2_ratio": 0.9898203052580357, "relative_reconstruction_bias": 0.9955843202769756, "loss_original": 2.988885059952736, "loss_reconstructed": 3.532444253563881, "loss_zero": 8.736501514911652, "frac_recovered": 0.9052783325314522, "frac_alive": 0.2255859375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5db4161a8edd411d06e46e121915d3828fb00748 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3a61fea027d5734d5448f63439a97dde688ac473070298a54c8db139a06369d +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6ec3c198ae488a1023ac85eed04edf4c9a658c18 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.02, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 5, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..23d727edea0c7cd83059b286c3e439ba90e7d8cd --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 8.733239889144897, "l1_loss": 38.12946534156799, "l0": 70.68100261688232, "frac_variance_explained": 0.8665555566549301, "cossim": 0.9914139024913311, "l2_ratio": 0.9890775866806507, "relative_reconstruction_bias": 0.9954653643071651, "loss_original": 2.988885059952736, "loss_reconstructed": 3.7251793295145035, "loss_zero": 8.736501514911652, "frac_recovered": 0.8717617243528366, "frac_alive": 0.1943359375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2f540f1abd04117fd6947ab0d32854335f71d0c --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e66f519bcd009da3aca19788083b75aa140634739ee16a969a2deda98bfe3a9 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..296f3a164d1d64f8d96c01608e3d632f089179e2 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.03, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 5, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f553bb4740c2e8649634ec6900793b89a69ad90a --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 9.87074327468872, "l1_loss": 28.784316301345825, "l0": 31.87483811378479, "frac_variance_explained": 0.8290930390357971, "cossim": 0.9890368469059467, "l2_ratio": 0.9880581386387348, "relative_reconstruction_bias": 0.9947948008775711, "loss_original": 2.988885059952736, "loss_reconstructed": 4.105308428406715, "loss_zero": 8.736501514911652, "frac_recovered": 0.8056857064366341, "frac_alive": 0.13836669921875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a29ae809a30b1941c8550cb794eb1071328f49b6 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a99cf17a74181fc0b966d64b7f405375fde6dee80b9609f6d1e039217c3c706 +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..142424996cad054bda1c177ce85c8c1e0c58cf8a --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 5, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..58bac7ac66fab3d13be56d3d271666bbd9a5bd37 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 10.871092021465302, "l1_loss": 23.7094464302063, "l0": 18.096278429031372, "frac_variance_explained": 0.7921247892081738, "cossim": 0.986845538020134, "l2_ratio": 0.9873640909790993, "relative_reconstruction_bias": 0.994219932705164, "loss_original": 2.988885059952736, "loss_reconstructed": 4.579065203666687, "loss_zero": 8.736501514911652, "frac_recovered": 0.7233187854290009, "frac_alive": 0.0751953125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/ae.pt b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e7494fe518ab41047092e8ca62fdfb6ff3441ae --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39a2eac6cba119eccd5972799867b1d7ec2732f63d77a2cb71e8d3631d18128d +size 67178280 diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/config.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a94fcabd12c22b51deb33264ef21dd90c4578325 --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "sparsity_warmup_steps": 5000, + "steps": 24414, + "decay_start": 19531, + "seed": 0, + "device": "cuda:5", + "layer": 5, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/eval_results.json b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bc0e7806ce9736f397331d6d5c8397a5924b59ea --- /dev/null +++ b/._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 12.426673173904419, "l1_loss": 17.739373326301575, "l0": 8.751068413257599, "frac_variance_explained": 0.7290268205106258, "cossim": 0.98327811434865, "l2_ratio": 0.986289095133543, "relative_reconstruction_bias": 0.9929703958332539, "loss_original": 2.988885059952736, "loss_reconstructed": 5.607620090246201, "loss_zero": 8.736501514911652, "frac_recovered": 0.5447485893964767, "frac_alive": 0.02435302734375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file