canrager commited on
Commit
3d81726
·
verified ·
1 Parent(s): 2756bc6

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json +26 -0
  2. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json +1 -0
  3. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_154/config.json +26 -0
  4. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_154/eval_results.json +1 -0
  5. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_1544/config.json +26 -0
  6. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_1544/eval_results.json +1 -0
  7. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_15440/config.json +26 -0
  8. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_15440/eval_results.json +1 -0
  9. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_48/config.json +26 -0
  10. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_48/eval_results.json +1 -0
  11. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_488/config.json +26 -0
  12. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_488/eval_results.json +1 -0
  13. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json +26 -0
  14. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json +1 -0
  15. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json +26 -0
  16. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json +1 -0
  17. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_154/config.json +26 -0
  18. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_154/eval_results.json +1 -0
  19. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_1544/config.json +26 -0
  20. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_1544/eval_results.json +1 -0
  21. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_15440/config.json +26 -0
  22. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_15440/eval_results.json +1 -0
  23. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_48/config.json +26 -0
  24. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_48/eval_results.json +1 -0
  25. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_488/config.json +26 -0
  26. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_488/eval_results.json +1 -0
  27. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json +26 -0
  28. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json +1 -0
  29. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json +26 -0
  30. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json +1 -0
  31. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_154/config.json +26 -0
  32. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_154/eval_results.json +1 -0
  33. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_1544/config.json +26 -0
  34. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_1544/eval_results.json +1 -0
  35. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_15440/config.json +26 -0
  36. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_15440/eval_results.json +1 -0
  37. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_48/config.json +26 -0
  38. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_48/eval_results.json +1 -0
  39. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_488/config.json +26 -0
  40. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_488/eval_results.json +1 -0
  41. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json +26 -0
  42. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json +1 -0
  43. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json +26 -0
  44. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json +1 -0
  45. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_154/config.json +26 -0
  46. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_154/eval_results.json +1 -0
  47. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_1544/config.json +26 -0
  48. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_1544/eval_results.json +1 -0
  49. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_15440/config.json +26 -0
  50. gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_15440/eval_results.json +1 -0
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "0",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 137.7, "l1_loss": 110.15, "l0": 20.0, "frac_variance_explained": 0.06328125, "cossim": 0.2876953125, "l2_ratio": 0.1814453125, "relative_reconstruction_bias": 0.630078125, "loss_original": 2.440642213821411, "loss_reconstructed": 11.407014656066895, "loss_zero": 12.452932643890382, "frac_recovered": 0.10465058535337449, "frac_alive": 0.138454869389534, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_154/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "154",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_154/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 93.55, "l1_loss": 238.6, "l0": 20.0, "frac_variance_explained": 0.334375, "cossim": 0.755078125, "l2_ratio": 0.79140625, "relative_reconstruction_bias": 1.01484375, "loss_original": 2.440642213821411, "loss_reconstructed": 4.918577527999878, "loss_zero": 12.452932643890382, "frac_recovered": 0.7525505006313324, "frac_alive": 0.1292317658662796, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_1544/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "1544",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_1544/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 70.4, "l1_loss": 267.2, "l0": 20.0, "frac_variance_explained": 0.633984375, "cossim": 0.871484375, "l2_ratio": 0.87109375, "relative_reconstruction_bias": 1.0, "loss_original": 2.440642213821411, "loss_reconstructed": 3.1963499784469604, "loss_zero": 12.452932643890382, "frac_recovered": 0.9245945453643799, "frac_alive": 0.1436631977558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_15440/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "15440",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_15440/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 65.425, "l1_loss": 311.0, "l0": 20.0, "frac_variance_explained": 0.758984375, "cossim": 0.8875, "l2_ratio": 0.890234375, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.7933643102645873, "loss_zero": 12.452932643890382, "frac_recovered": 0.9648277342319489, "frac_alive": 0.157009556889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_48/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "48",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_48/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 105.15, "l1_loss": 179.8, "l0": 20.0, "frac_variance_explained": 0.18828125, "cossim": 0.692578125, "l2_ratio": 0.716015625, "relative_reconstruction_bias": 0.9984375, "loss_original": 2.440642213821411, "loss_reconstructed": 6.1452779293060305, "loss_zero": 12.452932643890382, "frac_recovered": 0.6302249014377594, "frac_alive": 0.1695421040058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_488/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "488",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_488/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 73.85, "l1_loss": 262.3, "l0": 20.0, "frac_variance_explained": 0.6015625, "cossim": 0.857421875, "l2_ratio": 0.85625, "relative_reconstruction_bias": 0.996875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.600706672668457, "loss_zero": 12.452932643890382, "frac_recovered": 0.884125429391861, "frac_alive": 0.131618931889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "4882",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 66.1, "l1_loss": 277.0, "l0": 20.0, "frac_variance_explained": 0.680859375, "cossim": 0.8859375, "l2_ratio": 0.888671875, "relative_reconstruction_bias": 1.00390625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.888194966316223, "loss_zero": 12.452932643890382, "frac_recovered": 0.9553665339946746, "frac_alive": 0.1568467915058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "0",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 142.2, "l1_loss": 218.0, "l0": 40.0, "frac_variance_explained": 0.10546875, "cossim": 0.36953125, "l2_ratio": 0.2482421875, "relative_reconstruction_bias": 0.669921875, "loss_original": 2.440642213821411, "loss_reconstructed": 10.688310146331787, "loss_zero": 12.452932643890382, "frac_recovered": 0.1763722062110901, "frac_alive": 0.212185338139534, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_154/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "154",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_154/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 87.2, "l1_loss": 379.6, "l0": 40.0, "frac_variance_explained": 0.449609375, "cossim": 0.7953125, "l2_ratio": 0.8234375, "relative_reconstruction_bias": 1.015625, "loss_original": 2.440642213821411, "loss_reconstructed": 4.139649343490601, "loss_zero": 12.452932643890382, "frac_recovered": 0.830296915769577, "frac_alive": 0.2669813334941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_1544/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "1544",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_1544/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 65.55, "l1_loss": 343.8, "l0": 40.0, "frac_variance_explained": 0.665625, "cossim": 0.88984375, "l2_ratio": 0.888671875, "relative_reconstruction_bias": 0.998046875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.863617014884949, "loss_zero": 12.452932643890382, "frac_recovered": 0.9578355431556702, "frac_alive": 0.2782118022441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_15440/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "15440",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_15440/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 58.45, "l1_loss": 394.2, "l0": 40.0, "frac_variance_explained": 0.780078125, "cossim": 0.909375, "l2_ratio": 0.911328125, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.612251806259155, "loss_zero": 12.452932643890382, "frac_recovered": 0.9829114198684692, "frac_alive": 0.29541015625, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_48/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "48",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_48/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 108.0, "l1_loss": 338.6, "l0": 40.0, "frac_variance_explained": 0.305078125, "cossim": 0.733203125, "l2_ratio": 0.73984375, "relative_reconstruction_bias": 0.8953125, "loss_original": 2.440642213821411, "loss_reconstructed": 5.098966073989868, "loss_zero": 12.452932643890382, "frac_recovered": 0.7346842169761658, "frac_alive": 0.33349609375, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_488/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "488",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_488/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 69.6, "l1_loss": 384.6, "l0": 40.0, "frac_variance_explained": 0.665625, "cossim": 0.872265625, "l2_ratio": 0.8796875, "relative_reconstruction_bias": 1.0078125, "loss_original": 2.440642213821411, "loss_reconstructed": 3.157698321342468, "loss_zero": 12.452932643890382, "frac_recovered": 0.9283924698829651, "frac_alive": 0.2635633647441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "4882",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 61.075, "l1_loss": 350.0, "l0": 40.0, "frac_variance_explained": 0.703125, "cossim": 0.901171875, "l2_ratio": 0.901953125, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.661836934089661, "loss_zero": 12.452932643890382, "frac_recovered": 0.9779623448848724, "frac_alive": 0.2948133647441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "0",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 80,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 133.8, "l1_loss": 406.6, "l0": 80.0, "frac_variance_explained": 0.173828125, "cossim": 0.4671875, "l2_ratio": 0.3470703125, "relative_reconstruction_bias": 0.741796875, "loss_original": 2.440642213821411, "loss_reconstructed": 9.577101516723634, "loss_zero": 12.452932643890382, "frac_recovered": 0.2874674767255783, "frac_alive": 0.314019113779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_154/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "154",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 80,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_154/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 78.3, "l1_loss": 554.4, "l0": 80.0, "frac_variance_explained": 0.56953125, "cossim": 0.839453125, "l2_ratio": 0.851953125, "relative_reconstruction_bias": 1.0046875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.426113796234131, "loss_zero": 12.452932643890382, "frac_recovered": 0.9015444159507752, "frac_alive": 0.5124782919883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_1544/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "1544",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 80,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_1544/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 61.05, "l1_loss": 524.6, "l0": 80.0, "frac_variance_explained": 0.731640625, "cossim": 0.9046875, "l2_ratio": 0.9078125, "relative_reconstruction_bias": 1.0046875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.71366970539093, "loss_zero": 12.452932643890382, "frac_recovered": 0.9728043735027313, "frac_alive": 0.49365234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_15440/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "15440",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 80,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_15440/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 54.2, "l1_loss": 536.8, "l0": 80.0, "frac_variance_explained": 0.803125, "cossim": 0.925390625, "l2_ratio": 0.926171875, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.552223062515259, "loss_zero": 12.452932643890382, "frac_recovered": 0.9888993203639984, "frac_alive": 0.4521484375, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_48/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "48",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 80,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_48/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 91.75, "l1_loss": 515.4, "l0": 80.0, "frac_variance_explained": 0.414453125, "cossim": 0.773828125, "l2_ratio": 0.78984375, "relative_reconstruction_bias": 0.96953125, "loss_original": 2.440642213821411, "loss_reconstructed": 4.03226523399353, "loss_zero": 12.452932643890382, "frac_recovered": 0.841141802072525, "frac_alive": 0.5571831464767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_488/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "488",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 80,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_488/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 69.125, "l1_loss": 497.8, "l0": 80.0, "frac_variance_explained": 0.642578125, "cossim": 0.873046875, "l2_ratio": 0.87734375, "relative_reconstruction_bias": 1.004296875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.0880523681640626, "loss_zero": 12.452932643890382, "frac_recovered": 0.9353146016597748, "frac_alive": 0.5100911259651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "4882",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 80,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 54.45, "l1_loss": 593.0, "l0": 80.0, "frac_variance_explained": 0.847265625, "cossim": 0.921875, "l2_ratio": 0.923828125, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5707311153411867, "loss_zero": 12.452932643890382, "frac_recovered": 0.9870538175106048, "frac_alive": 0.4774305522441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "0",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 160,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 121.8, "l1_loss": 744.8, "l0": 160.0, "frac_variance_explained": 0.2640625, "cossim": 0.56953125, "l2_ratio": 0.4927734375, "relative_reconstruction_bias": 0.8609375, "loss_original": 2.440642213821411, "loss_reconstructed": 6.267248868942261, "loss_zero": 12.452932643890382, "frac_recovered": 0.6180142462253571, "frac_alive": 0.4386935830116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_154/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "154",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 160,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_154/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 69.05, "l1_loss": 742.0, "l0": 160.0, "frac_variance_explained": 0.65234375, "cossim": 0.875390625, "l2_ratio": 0.885546875, "relative_reconstruction_bias": 1.0046875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.9694986820220945, "loss_zero": 12.452932643890382, "frac_recovered": 0.9471549808979034, "frac_alive": 0.7988823652267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_1544/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "1544",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 160,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_1544/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 59.575, "l1_loss": 769.6, "l0": 160.0, "frac_variance_explained": 0.788671875, "cossim": 0.91484375, "l2_ratio": 0.916796875, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.621287798881531, "loss_zero": 12.452932643890382, "frac_recovered": 0.9820147037506104, "frac_alive": 0.76953125, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_15440/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0001885618083164127,
6
+ "steps": "15440",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 18432,
10
+ "k": 160,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_15440/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 48.3, "l1_loss": 731.6, "l0": 160.0, "frac_variance_explained": 0.833203125, "cossim": 0.93984375, "l2_ratio": 0.9390625, "relative_reconstruction_bias": 0.9984375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.512040066719055, "loss_zero": 12.452932643890382, "frac_recovered": 0.9929002702236176, "frac_alive": 0.6171332597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}}