Upload folder using huggingface_hub
Browse files- .DS_Store +0 -0
- config.json +1 -0
- k2-c0/cfg.json +1 -0
- k2-c0/sae.safetensors +3 -0
- k2-c1/cfg.json +1 -0
- k2-c1/sae.safetensors +3 -0
- k3-c0/cfg.json +1 -0
- k3-c0/sae.safetensors +3 -0
- k3-c1/cfg.json +1 -0
- k3-c1/sae.safetensors +3 -0
- k4-c2/cfg.json +1 -0
- k4-c2/sae.safetensors +3 -0
- k4-c3/cfg.json +1 -0
- k4-c3/sae.safetensors +3 -0
- k5-c1/cfg.json +1 -0
- k5-c1/sae.safetensors +3 -0
- k5-c2/cfg.json +1 -0
- k5-c2/sae.safetensors +3 -0
- l1_scheduler.pt +3 -0
- lr_scheduler.pt +3 -0
- scaling_factors.pt +3 -0
- state.pt +3 -0
.DS_Store
ADDED
Binary file (8.2 kB). View file
|
|
config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": -1, "multi_topk": false, "jumprelu": true, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "init_enc_as_dec_transpose": true}, "batch_size": 4, "max_seq_len": 1024, "num_training_tokens": 1000000000, "cycle_iterator": true, "grad_acc_steps": 1, "micro_acc_steps": 1, "adam_8bit": false, "adam_epsilon": 1e-08, "adam_betas": [0.0, 0.999], "lr": 0.0007, "lr_init": 7e-05, "lr_end": 7e-05, "lr_scheduler_name": "constant", "lr_warmup_steps": 0.01, "lr_decay_steps": 0.2, "l1_coefficient": 0.0003, "l1_warmup_steps": 0.05, "use_l2_loss": true, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.0", "layers.1", "layers.2", "layers.3", "layers.4", "layers.5", "layers.6", "layers.7", "layers.8", "layers.9", "layers.10"], "layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "layer_stride": 1, "distribute_modules": false, "save_every": 50000, "normalize_activations": 1, "num_norm_estimation_tokens": 2000000, "clusters": {"k2-c0": [0, 1, 2, 3, 4, 5, 6], "k2-c1": [7, 8, 9, 10], "k3-c0": [0, 1, 2], "k3-c1": [3, 4, 5, 6], "k4-c2": [7, 8], "k4-c3": [9, 10], "k5-c1": [3, 4], "k5-c2": [5, 6]}, "cluster_hookpoints": {"k2-c0": ["layers.0", "layers.1", "layers.2", "layers.3", "layers.4", "layers.5", "layers.6"], "k2-c1": ["layers.7", "layers.8", "layers.9", "layers.10"], "k3-c0": ["layers.0", "layers.1", "layers.2"], "k3-c1": ["layers.3", "layers.4", "layers.5", "layers.6"], "k4-c2": ["layers.7", "layers.8"], "k4-c3": ["layers.9", "layers.10"], "k5-c1": ["layers.3", "layers.4"], "k5-c2": ["layers.5", "layers.6"]}, "hook": null, "log_to_wandb": true, "run_name": "checkpoints-clusters/pythia-160m-deduped-1024-lambda-0.0003-target-L0-None-lr-0.0007", "wandb_log_frequency": 1}
|
k2-c0/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": -1, "multi_topk": false, "jumprelu": true, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "init_enc_as_dec_transpose": true, "d_in": 768}
|
k2-c0/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00dae6a438f193e59c54e9e5f85a9af2b40b956c3f759a7536fd1bad5618f99d
|
3 |
+
size 75599264
|
k2-c1/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": -1, "multi_topk": false, "jumprelu": true, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "init_enc_as_dec_transpose": true, "d_in": 768}
|
k2-c1/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2eedc6fb983f3ed807d9ec2ffe07942f521b35341cb6a6f5176c659162c9877
|
3 |
+
size 75599264
|
k3-c0/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": -1, "multi_topk": false, "jumprelu": true, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "init_enc_as_dec_transpose": true, "d_in": 768}
|
k3-c0/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e338daf48045e7a90a4bd430a6e0edb3d89b728f06b6a9db8a125ba49d3e367
|
3 |
+
size 75599264
|
k3-c1/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": -1, "multi_topk": false, "jumprelu": true, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "init_enc_as_dec_transpose": true, "d_in": 768}
|
k3-c1/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:212d76bba4e01cfaca0bb9bc09b3c424821266359fb5776a579e27a9c4623665
|
3 |
+
size 75599264
|
k4-c2/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": -1, "multi_topk": false, "jumprelu": true, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "init_enc_as_dec_transpose": true, "d_in": 768}
|
k4-c2/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:217d2a971e6e9ca9ff05a1219c9c271a6c8ec7a8253c520721f6b0b65161e063
|
3 |
+
size 75599264
|
k4-c3/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": -1, "multi_topk": false, "jumprelu": true, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "init_enc_as_dec_transpose": true, "d_in": 768}
|
k4-c3/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65d96882ca1f390e0cf7af32dca24fe28aa76f8a94381b5828e8635efe7c2685
|
3 |
+
size 75599264
|
k5-c1/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": -1, "multi_topk": false, "jumprelu": true, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "init_enc_as_dec_transpose": true, "d_in": 768}
|
k5-c1/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8610f2a61b1973a418fa80f212bd42211e69b576c12ace621c8f59f6d68cc541
|
3 |
+
size 75599264
|
k5-c2/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": -1, "multi_topk": false, "jumprelu": true, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "init_enc_as_dec_transpose": true, "d_in": 768}
|
k5-c2/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c236d425f7616e0fc66d9e48cf60e8f19aafdbc34847613d7d852ce7c295b4e
|
3 |
+
size 75599264
|
l1_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e9c3a305a515dd145375b00e49354ea6fd6775d0473c8228467fd9c0e0cd898
|
3 |
+
size 1012
|
lr_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da907babbb4e47553bf365712f119c369310fcc924144c8163d2917b1c96d245
|
3 |
+
size 1716
|
scaling_factors.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e3db8777a9549d8a63009b1cccd97ca981a4c8e9142d95c788587e11810a01c
|
3 |
+
size 1152
|
state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c2251a67d521d735ac9ba0c5ee0f025bf0eda0d7c8501b93ea53647a4845f35
|
3 |
+
size 788968
|