nshan144 commited on
Commit
93194aa
·
verified ·
1 Parent(s): 7abb4ef

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/ae.pt +3 -0
  2. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/config.json +30 -0
  3. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/eval_results.json +1 -0
  4. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/ae.pt +3 -0
  5. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/config.json +30 -0
  6. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/eval_results.json +1 -0
  7. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/ae.pt +3 -0
  8. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/config.json +30 -0
  9. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/eval_results.json +1 -0
  10. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/ae.pt +3 -0
  11. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/config.json +30 -0
  12. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/eval_results.json +1 -0
  13. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/ae.pt +3 -0
  14. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/config.json +30 -0
  15. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/eval_results.json +1 -0
  16. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/ae.pt +3 -0
  17. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/config.json +30 -0
  18. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/eval_results.json +1 -0
  19. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/ae.pt +3 -0
  20. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/config.json +30 -0
  21. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/eval_results.json +1 -0
  22. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/ae.pt +3 -0
  23. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/config.json +30 -0
  24. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/eval_results.json +1 -0
  25. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/ae.pt +3 -0
  26. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/config.json +30 -0
  27. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/eval_results.json +1 -0
  28. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/ae.pt +3 -0
  29. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/config.json +30 -0
  30. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/eval_results.json +1 -0
  31. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/ae.pt +3 -0
  32. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/config.json +30 -0
  33. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/eval_results.json +1 -0
  34. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/ae.pt +3 -0
  35. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/config.json +30 -0
  36. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/eval_results.json +1 -0
  37. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/ae.pt +3 -0
  38. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/config.json +30 -0
  39. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/eval_results.json +1 -0
  40. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/ae.pt +3 -0
  41. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/config.json +30 -0
  42. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/eval_results.json +1 -0
  43. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/ae.pt +3 -0
  44. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/config.json +30 -0
  45. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/eval_results.json +1 -0
  46. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/ae.pt +3 -0
  47. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/config.json +30 -0
  48. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/eval_results.json +1 -0
  49. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/ae.pt +3 -0
  50. ._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/config.json +30 -0
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4a53998d60d76090fb5f52a4b8f7f9e5993c4747484a0ebf9ca85368df9fbc0
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.012,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 0,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
19
+ "submodule_name": "resid_post_layer_0"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 0.8880099132657051, "l1_loss": 24.71984565258026, "l0": 289.50769233703613, "frac_variance_explained": 0.9763443879783154, "cossim": 0.992082379758358, "l2_ratio": 0.9707153402268887, "relative_reconstruction_bias": 0.9786949455738068, "loss_original": 2.988885059952736, "loss_reconstructed": 3.016266644001007, "loss_zero": 8.828418850898743, "frac_recovered": 0.9953007213771343, "frac_alive": 0.7119140625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b9f5bcb3f911be5660637af45cfb788366b3a921fd989b4adec1c90dca78e69
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.015,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 0,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
19
+ "submodule_name": "resid_post_layer_0"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 0.9838254600763321, "l1_loss": 22.220465064048767, "l0": 232.73355770111084, "frac_variance_explained": 0.9710042513906956, "cossim": 0.9902598187327385, "l2_ratio": 0.9662889577448368, "relative_reconstruction_bias": 0.9760206341743469, "loss_original": 2.988885059952736, "loss_reconstructed": 3.024588406085968, "loss_zero": 8.828418850898743, "frac_recovered": 0.9938735403120518, "frac_alive": 0.7171630859375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46421d60d7e5e3802d52e75af8f4392e5ad4cd6b0c25058dd11dbaf36ca4cb51
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.02,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 0,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
19
+ "submodule_name": "resid_post_layer_0"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 1.1100959181785583, "l1_loss": 19.30671751499176, "l0": 169.3354959487915, "frac_variance_explained": 0.9629898369312286, "cossim": 0.9875496290624142, "l2_ratio": 0.9594511836767197, "relative_reconstruction_bias": 0.971693217754364, "loss_original": 2.988885059952736, "loss_reconstructed": 3.035519316792488, "loss_zero": 8.828418850898743, "frac_recovered": 0.9920109137892723, "frac_alive": 0.7247314453125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aedba6b88aece44504d5278055fc7095c799878107f9b1f3e7e1284655e9b7f
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.03,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 0,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
19
+ "submodule_name": "resid_post_layer_0"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 1.2951818630099297, "l1_loss": 15.69888824224472, "l0": 103.05662822723389, "frac_variance_explained": 0.9493058025836945, "cossim": 0.9829495437443256, "l2_ratio": 0.9483063369989395, "relative_reconstruction_bias": 0.9647174514830112, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0596881806850433, "loss_zero": 8.828418850898743, "frac_recovered": 0.9878532290458679, "frac_alive": 0.73175048828125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ddf297c7acecace4c111bda5f154add817584cb99367df5d1555cbabd344ed3
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.04,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 0,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
19
+ "submodule_name": "resid_post_layer_0"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 1.4320023581385612, "l1_loss": 13.441674709320068, "l0": 70.04790019989014, "frac_variance_explained": 0.9376466050744057, "cossim": 0.9790421277284622, "l2_ratio": 0.9389512911438942, "relative_reconstruction_bias": 0.9588979072868824, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0810854732990265, "loss_zero": 8.828418850898743, "frac_recovered": 0.9842088855803013, "frac_alive": 0.736328125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:262ad703e05eff4dcbcb23e0d0c8f699830b629590e3911ff7c481b6a727c3de
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.06,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 0,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
19
+ "submodule_name": "resid_post_layer_0"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 1.6484556645154953, "l1_loss": 10.752245247364044, "l0": 39.29365658760071, "frac_variance_explained": 0.9167096689343452, "cossim": 0.9719779714941978, "l2_ratio": 0.9227463938295841, "relative_reconstruction_bias": 0.9491799250245094, "loss_original": 2.988885059952736, "loss_reconstructed": 3.131672754883766, "loss_zero": 8.828418850898743, "frac_recovered": 0.9755509421229362, "frac_alive": 0.73443603515625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b08da4344dfcf1007773109a0e3c6940a53cb334bb16dbd0eedf3e8807addae
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.012,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 1,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
19
+ "submodule_name": "resid_post_layer_1"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 1.3996912688016891, "l1_loss": 42.294498443603516, "l0": 848.6530647277832, "frac_variance_explained": 0.9665217883884907, "cossim": 0.9879145994782448, "l2_ratio": 0.9566630609333515, "relative_reconstruction_bias": 0.970756758004427, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0269704312086105, "loss_zero": 8.97075641155243, "frac_recovered": 0.9936139024794102, "frac_alive": 0.69598388671875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08dabf4afc722af480fa50ea4c71aeea6a686b413f88b8daa0cd6e6c54497526
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.015,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 1,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
19
+ "submodule_name": "resid_post_layer_1"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 1.5650566518306732, "l1_loss": 37.678173542022705, "l0": 667.7053680419922, "frac_variance_explained": 0.9581456147134304, "cossim": 0.9848296083509922, "l2_ratio": 0.9501992017030716, "relative_reconstruction_bias": 0.9674350023269653, "loss_original": 2.988885059952736, "loss_reconstructed": 3.038178339600563, "loss_zero": 8.97075641155243, "frac_recovered": 0.9917236343026161, "frac_alive": 0.705078125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a4f6e0256082fd3383b69641e8c1bbdef418eeafdc2ac7e3a8aec772d945261
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.02,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 1,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
19
+ "submodule_name": "resid_post_layer_1"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 1.7955679446458817, "l1_loss": 31.955636978149414, "l0": 463.37560272216797, "frac_variance_explained": 0.9448460824787617, "cossim": 0.9799039028584957, "l2_ratio": 0.9401619769632816, "relative_reconstruction_bias": 0.9621202535927296, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0586147010326385, "loss_zero": 8.97075641155243, "frac_recovered": 0.9883030280470848, "frac_alive": 0.71124267578125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0600706d266241f97018ce9cc759dce3a5522e6158ceef7295b6f21093855ab9
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.03,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 1,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
19
+ "submodule_name": "resid_post_layer_1"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.1483198404312134, "l1_loss": 24.6392685174942, "l0": 251.70951080322266, "frac_variance_explained": 0.9206176958978176, "cossim": 0.9708879068493843, "l2_ratio": 0.9243263490498066, "relative_reconstruction_bias": 0.9547557085752487, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0947447419166565, "loss_zero": 8.97075641155243, "frac_recovered": 0.9822593331336975, "frac_alive": 0.7169189453125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03813180d194a02464de0c2e3ca2a94516a58d766df99d15bc9bf20ae5d87519
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.04,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 1,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
19
+ "submodule_name": "resid_post_layer_1"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.406396448612213, "l1_loss": 20.208199858665466, "l0": 153.5412302017212, "frac_variance_explained": 0.9000130482017994, "cossim": 0.9631378278136253, "l2_ratio": 0.9116865955293179, "relative_reconstruction_bias": 0.9493150822818279, "loss_original": 2.988885059952736, "loss_reconstructed": 3.1343201994895935, "loss_zero": 8.97075641155243, "frac_recovered": 0.9756543561816216, "frac_alive": 0.71405029296875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b80de1f4b215560289634e7a53b3ba1cadeb76cd53f4b9ed75be1d808afcc31
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.06,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 1,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
19
+ "submodule_name": "resid_post_layer_1"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.771856963634491, "l1_loss": 15.305355727672577, "l0": 73.31960344314575, "frac_variance_explained": 0.8663658648729324, "cossim": 0.9504912123084068, "l2_ratio": 0.8903653882443905, "relative_reconstruction_bias": 0.9395905695855618, "loss_original": 2.988885059952736, "loss_reconstructed": 3.2098177671432495, "loss_zero": 8.97075641155243, "frac_recovered": 0.9631249755620956, "frac_alive": 0.68414306640625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9696848df3866e7f6ede6a8f00e462144c0277b9554abf81ac1810b7e2273ea7
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.012,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 2,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2",
19
+ "submodule_name": "resid_post_layer_2"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.112189382314682, "l1_loss": 56.772852659225464, "l0": 828.3504066467285, "frac_variance_explained": 0.9771254733204842, "cossim": 0.9856770560145378, "l2_ratio": 0.9464036673307419, "relative_reconstruction_bias": 0.9773441404104233, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0675582885742188, "loss_zero": 9.41981041431427, "frac_recovered": 0.9877439066767693, "frac_alive": 0.65985107421875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a2518ab07b7e838c367313557ee4e80e503934c23b97b703ac8fb6e3b73b479
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.015,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 2,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2",
19
+ "submodule_name": "resid_post_layer_2"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.363228440284729, "l1_loss": 50.97706913948059, "l0": 656.342887878418, "frac_variance_explained": 0.971705749630928, "cossim": 0.9820158407092094, "l2_ratio": 0.9374741353094578, "relative_reconstruction_bias": 0.9745247662067413, "loss_original": 2.988885059952736, "loss_reconstructed": 3.095957577228546, "loss_zero": 9.41981041431427, "frac_recovered": 0.9833251647651196, "frac_alive": 0.65740966796875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:239de60a10fa562f5d2012ad482a7386cffb7f5e09b42cb6222afde0fd33c9be
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.02,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 2,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2",
19
+ "submodule_name": "resid_post_layer_2"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.7178614288568497, "l1_loss": 43.7660813331604, "l0": 471.93741607666016, "frac_variance_explained": 0.9619039408862591, "cossim": 0.9760403409600258, "l2_ratio": 0.924134623259306, "relative_reconstruction_bias": 0.9698718897998333, "loss_original": 2.988885059952736, "loss_reconstructed": 3.1424050331115723, "loss_zero": 9.41981041431427, "frac_recovered": 0.9760990291833878, "frac_alive": 0.6507568359375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:826b28d6e0e275ef3c1e1a34ff8b11dd035b1b2d3cd7097afc3ab798bff7b0d0
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.03,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 2,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2",
19
+ "submodule_name": "resid_post_layer_2"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.258958250284195, "l1_loss": 34.640897035598755, "l0": 280.277286529541, "frac_variance_explained": 0.9457531869411469, "cossim": 0.9651481173932552, "l2_ratio": 0.9025088399648666, "relative_reconstruction_bias": 0.9640039317309856, "loss_original": 2.988885059952736, "loss_reconstructed": 3.245699033141136, "loss_zero": 9.41981041431427, "frac_recovered": 0.9601433612406254, "frac_alive": 0.62640380859375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc0e4a110f8a5d38254afc3fd71272f2de44b6cdb3c62c48f617484400d56d35
3
+ size 67178280
._perlayer1_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.04,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 5000,
12
+ "steps": 24414,
13
+ "decay_start": 19531,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 2,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2",
19
+ "submodule_name": "resid_post_layer_2"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }