adamkarvonen commited on
Commit
00189a8
·
verified ·
1 Parent(s): febe0e2

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  2. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json +32 -0
  3. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json +1 -0
  4. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  5. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json +32 -0
  6. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json +1 -0
  7. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  8. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json +32 -0
  9. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json +1 -0
  10. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  11. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json +32 -0
  12. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json +1 -0
  13. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
  14. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json +32 -0
  15. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json +1 -0
  16. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt +3 -0
  17. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json +32 -0
  18. BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json +1 -0
  19. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  20. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json +28 -0
  21. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json +1 -0
  22. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  23. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json +28 -0
  24. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json +1 -0
  25. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  26. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json +28 -0
  27. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json +1 -0
  28. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  29. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json +28 -0
  30. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json +1 -0
  31. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
  32. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json +28 -0
  33. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json +1 -0
  34. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt +3 -0
  35. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json +28 -0
  36. GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json +1 -0
  37. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt +3 -0
  38. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json +29 -0
  39. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json +1 -0
  40. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt +3 -0
  41. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json +29 -0
  42. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json +1 -0
  43. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt +3 -0
  44. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json +29 -0
  45. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json +1 -0
  46. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt +3 -0
  47. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json +29 -0
  48. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json +1 -0
  49. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt +3 -0
  50. JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json +29 -0
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:731f9e5cdb09067bf8e15f162aff6438e7e79db5a9987c735e4b28b97fe7d2b7
3
+ size 100733974
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 384,
13
+ "seed": 0,
14
+ "activation_dim": 768,
15
+ "dict_size": 16384,
16
+ "k": 20,
17
+ "device": "cuda:1",
18
+ "layer": 8,
19
+ "lm_name": "EleutherAI/pythia-160m-deduped",
20
+ "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_0",
21
+ "submodule_name": "resid_post_layer_8"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 768,
25
+ "io": "out",
26
+ "n_ctxs": 244,
27
+ "ctx_len": 1024,
28
+ "refresh_batch_size": 32,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:1"
31
+ }
32
+ }
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 7.165702819824219, "l1_loss": 52.398841048731946, "l0": 19.910211158521246, "frac_variance_explained": 0.9256864298473705, "cossim": 0.9520920298316262, "l2_ratio": 0.9514842954548922, "relative_reconstruction_bias": 0.9993569128441088, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.9917597409450645, "loss_zero": 12.187079458525687, "frac_recovered": 0.9568525624997688, "frac_alive": 0.933837890625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da5a8f71335eba31afa1eca0d8b6f01790dec259246cbd6bc7225951a5486d9a
3
+ size 100733974
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 384,
13
+ "seed": 0,
14
+ "activation_dim": 768,
15
+ "dict_size": 16384,
16
+ "k": 40,
17
+ "device": "cuda:1",
18
+ "layer": 8,
19
+ "lm_name": "EleutherAI/pythia-160m-deduped",
20
+ "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_1",
21
+ "submodule_name": "resid_post_layer_8"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 768,
25
+ "io": "out",
26
+ "n_ctxs": 244,
27
+ "ctx_len": 1024,
28
+ "refresh_batch_size": 32,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:1"
31
+ }
32
+ }
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 6.358675190896699, "l1_loss": 67.62095688328598, "l0": 39.82553227742513, "frac_variance_explained": 0.9409584890712391, "cossim": 0.9623859289920691, "l2_ratio": 0.9620392033548066, "relative_reconstruction_bias": 0.9997165492086699, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.852238994656187, "loss_zero": 12.187079458525687, "frac_recovered": 0.9724247419472897, "frac_alive": 0.92108154296875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe65f3e4ad7fe582508b307625716ff197a04a3d371834c7222c023169597fbd
3
+ size 100733974
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 384,
13
+ "seed": 0,
14
+ "activation_dim": 768,
15
+ "dict_size": 16384,
16
+ "k": 80,
17
+ "device": "cuda:1",
18
+ "layer": 8,
19
+ "lm_name": "EleutherAI/pythia-160m-deduped",
20
+ "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_2",
21
+ "submodule_name": "resid_post_layer_8"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 768,
25
+ "io": "out",
26
+ "n_ctxs": 244,
27
+ "ctx_len": 1024,
28
+ "refresh_batch_size": 32,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:1"
31
+ }
32
+ }
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 5.535941615249172, "l1_loss": 90.0740072076971, "l0": 79.61385576652758, "frac_variance_explained": 0.9549660375623992, "cossim": 0.9716276183272853, "l2_ratio": 0.9715044552629645, "relative_reconstruction_bias": 1.0009427467981975, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.7588755361961597, "loss_zero": 12.187079458525687, "frac_recovered": 0.9828645659215522, "frac_alive": 0.873291015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62d31b3f692684c53b5c5e8ff60b739baa9a0daf57abe707da33701733063e7d
3
+ size 100733974
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 384,
13
+ "seed": 0,
14
+ "activation_dim": 768,
15
+ "dict_size": 16384,
16
+ "k": 160,
17
+ "device": "cuda:1",
18
+ "layer": 8,
19
+ "lm_name": "EleutherAI/pythia-160m-deduped",
20
+ "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_3",
21
+ "submodule_name": "resid_post_layer_8"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 768,
25
+ "io": "out",
26
+ "n_ctxs": 244,
27
+ "ctx_len": 1024,
28
+ "refresh_batch_size": 32,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:1"
31
+ }
32
+ }
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 4.596404407963608, "l1_loss": 153.62816642992425, "l0": 159.1172462232185, "frac_variance_explained": 0.9690298311638109, "cossim": 0.98062437953371, "l2_ratio": 0.9803878791404493, "relative_reconstruction_bias": 1.0007607864611077, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.6916814464511294, "loss_zero": 12.187079458525687, "frac_recovered": 0.9904219201116851, "frac_alive": 0.7470703125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff290fc3b27d35eee2431b923928da0a5530eaf02380cc605a3f04aa3bc61287
3
+ size 100733974
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 384,
13
+ "seed": 0,
14
+ "activation_dim": 768,
15
+ "dict_size": 16384,
16
+ "k": 320,
17
+ "device": "cuda:1",
18
+ "layer": 8,
19
+ "lm_name": "EleutherAI/pythia-160m-deduped",
20
+ "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_4",
21
+ "submodule_name": "resid_post_layer_8"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 768,
25
+ "io": "out",
26
+ "n_ctxs": 244,
27
+ "ctx_len": 1024,
28
+ "refresh_batch_size": 32,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:1"
31
+ }
32
+ }
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.1190587029312598, "l1_loss": 295.7740866921165, "l0": 318.5566424745502, "frac_variance_explained": 0.9860216288855581, "cossim": 0.9912758794697848, "l2_ratio": 0.9912153554685188, "relative_reconstruction_bias": 1.0004844918395535, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.63657668503848, "loss_zero": 12.187079458525687, "frac_recovered": 0.9965930906209078, "frac_alive": 0.32232666015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:405bcdf303076eb4e67b6dcd34a2c041278a2574400fd19e2deb02eb9df12560
3
+ size 100733974
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 384,
13
+ "seed": 0,
14
+ "activation_dim": 768,
15
+ "dict_size": 16384,
16
+ "k": 640,
17
+ "device": "cuda:1",
18
+ "layer": 8,
19
+ "lm_name": "EleutherAI/pythia-160m-deduped",
20
+ "wandb_name": "BatchTopKTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_5",
21
+ "submodule_name": "resid_post_layer_8"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 768,
25
+ "io": "out",
26
+ "n_ctxs": 244,
27
+ "ctx_len": 1024,
28
+ "refresh_batch_size": 32,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:1"
31
+ }
32
+ }
BatchTopK_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 1.5527055624759558, "l1_loss": 626.5316938920455, "l0": 639.1975338097775, "frac_variance_explained": 0.9963156291932771, "cossim": 0.9977991869955352, "l2_ratio": 0.9976590293826479, "relative_reconstruction_bias": 1.0002403476021506, "loss_original": 2.6064688870401094, "loss_reconstructed": 2.6137762756058662, "loss_zero": 12.187079458525687, "frac_recovered": 0.9991628256711093, "frac_alive": 0.06146240234375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50b23e31ad3ee499518d7bd643be829a3ee7b86b236059a0146799d475bb1c6c
3
+ size 100865046
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 768,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.012,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 0,
13
+ "device": "cuda:0",
14
+ "layer": 8,
15
+ "lm_name": "EleutherAI/pythia-160m-deduped",
16
+ "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_12",
17
+ "submodule_name": "resid_post_layer_8"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 768,
21
+ "io": "out",
22
+ "n_ctxs": 244,
23
+ "ctx_len": 1024,
24
+ "refresh_batch_size": 32,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda:0"
27
+ }
28
+ }
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.449277211384601, "l1_loss": 233.9408302766731, "l0": 483.06496126105986, "frac_variance_explained": 0.9913476149001753, "cossim": 0.9946498181446489, "l2_ratio": 0.9939263398388782, "relative_reconstruction_bias": 0.9993272940796541, "loss_original": 2.591329812285412, "loss_reconstructed": 2.6073691378156822, "loss_zero": 12.979128625019488, "frac_recovered": 0.9982530004288777, "frac_alive": 0.59857177734375, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c989b27ea8ba899f086eb1e4bbf13a21a3e8ebc86900fb5d588fd7f804b814f4
3
+ size 100865046
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 768,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.018,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 0,
13
+ "device": "cuda:0",
14
+ "layer": 8,
15
+ "lm_name": "EleutherAI/pythia-160m-deduped",
16
+ "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_13",
17
+ "submodule_name": "resid_post_layer_8"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 768,
21
+ "io": "out",
22
+ "n_ctxs": 244,
23
+ "ctx_len": 1024,
24
+ "refresh_batch_size": 32,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda:0"
27
+ }
28
+ }
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.718252821140979, "l1_loss": 174.70882599612315, "l0": 357.33407280244023, "frac_variance_explained": 0.9797811321465366, "cossim": 0.9874855103981064, "l2_ratio": 0.9873908984373851, "relative_reconstruction_bias": 1.0000614502343788, "loss_original": 2.591329812285412, "loss_reconstructed": 2.638556733906987, "loss_zero": 12.979128625019488, "frac_recovered": 0.9948906988264566, "frac_alive": 0.55438232421875, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0818ec859a15998633c73ad5dce110641f60f0b7b999aafd459d8bcaafa407d
3
+ size 100865046
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 768,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.024,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 0,
13
+ "device": "cuda:0",
14
+ "layer": 8,
15
+ "lm_name": "EleutherAI/pythia-160m-deduped",
16
+ "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_14",
17
+ "submodule_name": "resid_post_layer_8"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 768,
21
+ "io": "out",
22
+ "n_ctxs": 244,
23
+ "ctx_len": 1024,
24
+ "refresh_batch_size": 32,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda:0"
27
+ }
28
+ }
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 4.538920862128935, "l1_loss": 141.84569384103798, "l0": 227.8396312299981, "frac_variance_explained": 0.9699712948626783, "cossim": 0.9812705624534424, "l2_ratio": 0.98167203241084, "relative_reconstruction_bias": 1.000891358737486, "loss_original": 2.591329812285412, "loss_reconstructed": 2.6714729882148376, "loss_zero": 12.979128625019488, "frac_recovered": 0.9913170700331768, "frac_alive": 0.71710205078125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01de966bee6924eeaadb356982edc47ee4ae5d8c402495317c21dfafaf2361f7
3
+ size 100865046
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 768,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.04,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 0,
13
+ "device": "cuda:0",
14
+ "layer": 8,
15
+ "lm_name": "EleutherAI/pythia-160m-deduped",
16
+ "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_15",
17
+ "submodule_name": "resid_post_layer_8"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 768,
21
+ "io": "out",
22
+ "n_ctxs": 244,
23
+ "ctx_len": 1024,
24
+ "refresh_batch_size": 32,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda:0"
27
+ }
28
+ }
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 5.540974214852574, "l1_loss": 94.82435709022614, "l0": 95.3962563204478, "frac_variance_explained": 0.9552597058824746, "cossim": 0.9718739264700786, "l2_ratio": 0.9708090194736618, "relative_reconstruction_bias": 0.9997583469712591, "loss_original": 2.591329812285412, "loss_reconstructed": 2.7390204301799637, "loss_zero": 12.979128625019488, "frac_recovered": 0.9840897005724619, "frac_alive": 0.9267578125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eb06802eee053782d3d338803d4f408ce84b0a1cc281748a266cb4f772a956e
3
+ size 100865046
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 768,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.06,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 0,
13
+ "device": "cuda:0",
14
+ "layer": 8,
15
+ "lm_name": "EleutherAI/pythia-160m-deduped",
16
+ "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_16",
17
+ "submodule_name": "resid_post_layer_8"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 768,
21
+ "io": "out",
22
+ "n_ctxs": 244,
23
+ "ctx_len": 1024,
24
+ "refresh_batch_size": 32,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda:0"
27
+ }
28
+ }
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 6.3163925567305235, "l1_loss": 64.96194687808853, "l0": 48.62305409075266, "frac_variance_explained": 0.9414439212126904, "cossim": 0.9631719384566847, "l2_ratio": 0.9623617971517954, "relative_reconstruction_bias": 0.9998143462531538, "loss_original": 2.591329812285412, "loss_reconstructed": 2.8256030183240592, "loss_zero": 12.979128625019488, "frac_recovered": 0.9748218806393175, "frac_alive": 0.92987060546875, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:492aae9f99769dba8c556b726451e369b1eeb33b1baf558a90444dbfad618fb2
3
+ size 100865046
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 768,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.08,
9
+ "warmup_steps": 1000,
10
+ "sparsity_warmup_steps": 5000,
11
+ "decay_start": 195312,
12
+ "seed": 0,
13
+ "device": "cuda:0",
14
+ "layer": 8,
15
+ "lm_name": "EleutherAI/pythia-160m-deduped",
16
+ "wandb_name": "GatedTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_17",
17
+ "submodule_name": "resid_post_layer_8"
18
+ },
19
+ "buffer": {
20
+ "d_submodule": 768,
21
+ "io": "out",
22
+ "n_ctxs": 244,
23
+ "ctx_len": 1024,
24
+ "refresh_batch_size": 32,
25
+ "out_batch_size": 2048,
26
+ "device": "cuda:0"
27
+ }
28
+ }
GatedSAE_pythia-160m-deduped__0108/resid_post_layer_8/trainer_5/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 6.866190913211868, "l1_loss": 55.27316311755812, "l0": 30.90061481889472, "frac_variance_explained": 0.9303119121545769, "cossim": 0.9563094456511808, "l2_ratio": 0.9556090986154165, "relative_reconstruction_bias": 0.9998748388635107, "loss_original": 2.591329812285412, "loss_reconstructed": 2.912389753812767, "loss_zero": 12.979128625019488, "frac_recovered": 0.9655166954161173, "frac_alive": 0.86328125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35b66366e463e23602a12aa239decfaf82325071984f6e48fbfc53f1e97e1aac
3
+ size 100799263
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 0,
8
+ "activation_dim": 768,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 8,
12
+ "lm_name": "EleutherAI/pythia-160m-deduped",
13
+ "wandb_name": "JumpReluTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_30",
14
+ "submodule_name": "resid_post_layer_8",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 20
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 768,
22
+ "io": "out",
23
+ "n_ctxs": 244,
24
+ "ctx_len": 1024,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 7.542313641812428, "l1_loss": 59.5405657664839, "l0": 20.316411776715015, "frac_variance_explained": 0.9169782827417534, "cossim": 0.9468502043241478, "l2_ratio": 0.9470895029694201, "relative_reconstruction_bias": 1.0005664681813804, "loss_original": 2.591329812285412, "loss_reconstructed": 3.063094075903835, "loss_zero": 12.979128625019488, "frac_recovered": 0.9492282486823668, "frac_alive": 0.423095703125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52a73e6d5540b5666e46b3ce47c868f81846d1ac46cc3f2fd0ed174158fe95e3
3
+ size 100799263
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 0,
8
+ "activation_dim": 768,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 8,
12
+ "lm_name": "EleutherAI/pythia-160m-deduped",
13
+ "wandb_name": "JumpReluTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_31",
14
+ "submodule_name": "resid_post_layer_8",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 40
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 768,
22
+ "io": "out",
23
+ "n_ctxs": 244,
24
+ "ctx_len": 1024,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 6.596800962126399, "l1_loss": 68.14624850721245, "l0": 40.465813809130566, "frac_variance_explained": 0.9358430052378092, "cossim": 0.9595368070056639, "l2_ratio": 0.9597559456365654, "relative_reconstruction_bias": 1.0001378511808006, "loss_original": 2.591329812285412, "loss_reconstructed": 2.866633120071457, "loss_zero": 12.979128625019488, "frac_recovered": 0.9703360113752894, "frac_alive": 0.53546142578125, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b355bdff67e88e1ccf0c30ff781cd2408f566c203a2ffea6d730c47faf7e6958
3
+ size 100799263
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 0,
8
+ "activation_dim": 768,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 8,
12
+ "lm_name": "EleutherAI/pythia-160m-deduped",
13
+ "wandb_name": "JumpReluTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_32",
14
+ "submodule_name": "resid_post_layer_8",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 80
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 768,
22
+ "io": "out",
23
+ "n_ctxs": 244,
24
+ "ctx_len": 1024,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 5.868561931403287, "l1_loss": 87.49280005765249, "l0": 73.93405321419957, "frac_variance_explained": 0.9491253247462124, "cossim": 0.9681477079908531, "l2_ratio": 0.9674936364214104, "relative_reconstruction_bias": 0.9984568217432642, "loss_original": 2.591329812285412, "loss_reconstructed": 2.771394150802888, "loss_zero": 12.979128625019488, "frac_recovered": 0.9805464475269777, "frac_alive": 0.52166748046875, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57b55991b9ed3a61bfbd12123f73db672397e9e3917e4eb1a14a0dc8e05792dc
3
+ size 100799263
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 0,
8
+ "activation_dim": 768,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 8,
12
+ "lm_name": "EleutherAI/pythia-160m-deduped",
13
+ "wandb_name": "JumpReluTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_33",
14
+ "submodule_name": "resid_post_layer_8",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 160
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 768,
22
+ "io": "out",
23
+ "n_ctxs": 244,
24
+ "ctx_len": 1024,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 4.841383078012122, "l1_loss": 130.77086565867964, "l0": 155.94150111186934, "frac_variance_explained": 0.9658320416887123, "cossim": 0.9784838686506432, "l2_ratio": 0.978472747716559, "relative_reconstruction_bias": 1.0000444162322815, "loss_original": 2.591329812285412, "loss_reconstructed": 2.6872186675129166, "loss_zero": 12.979128625019488, "frac_recovered": 0.9896216367382601, "frac_alive": 0.4205322265625, "hyperparameters": {"n_inputs": 1000, "context_length": 1024}}
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:123ae303303f92498d1269b0e4c32df87c055d51b5bb87f6e7d5cbfbb23efca7
3
+ size 100799263
JumpRelu_pythia-160m-deduped__0108/resid_post_layer_8/trainer_4/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "JumpReluTrainer",
4
+ "dict_class": "JumpReluAutoEncoder",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "seed": 0,
8
+ "activation_dim": 768,
9
+ "dict_size": 16384,
10
+ "device": "cuda:0",
11
+ "layer": 8,
12
+ "lm_name": "EleutherAI/pythia-160m-deduped",
13
+ "wandb_name": "JumpReluTrainer-EleutherAI/pythia-160m-deduped-resid_post_layer_8_trainer_34",
14
+ "submodule_name": "resid_post_layer_8",
15
+ "bandwidth": 0.001,
16
+ "sparsity_penalty": 1.0,
17
+ "sparsity_warmup_steps": 5000,
18
+ "target_l0": 320
19
+ },
20
+ "buffer": {
21
+ "d_submodule": 768,
22
+ "io": "out",
23
+ "n_ctxs": 244,
24
+ "ctx_len": 1024,
25
+ "refresh_batch_size": 32,
26
+ "out_batch_size": 2048,
27
+ "device": "cuda:0"
28
+ }
29
+ }