Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- baseline-128x-k128/config.json +1 -0
- baseline-128x-k128/layers.10.mlp/cfg.json +1 -0
- baseline-128x-k128/layers.10.mlp/sae.safetensors +3 -0
- baseline-128x-k128/layers.15.mlp/cfg.json +1 -0
- baseline-128x-k128/layers.15.mlp/sae.safetensors +3 -0
- baseline-128x-k128/layers.20.mlp/cfg.json +1 -0
- baseline-128x-k128/layers.20.mlp/sae.safetensors +3 -0
- baseline-128x-k128/lr_scheduler_0.pt +3 -0
- baseline-128x-k128/optimizer_0.pt +3 -0
- baseline-128x-k128/optimizer_1.pt +3 -0
- baseline-128x-k128/rank_0_state.pt +3 -0
- baseline-128x-k128/state.pt +3 -0
- baseline-16x-k128/config.json +1 -0
- baseline-16x-k128/layers.10.mlp/cfg.json +1 -0
- baseline-16x-k128/layers.10.mlp/sae.safetensors +3 -0
- baseline-16x-k128/layers.15.mlp/cfg.json +1 -0
- baseline-16x-k128/layers.15.mlp/sae.safetensors +3 -0
- baseline-16x-k128/layers.20.mlp/cfg.json +1 -0
- baseline-16x-k128/layers.20.mlp/sae.safetensors +3 -0
- baseline-16x-k128/lr_scheduler_0.pt +3 -0
- baseline-16x-k128/optimizer_0.pt +3 -0
- baseline-16x-k128/optimizer_1.pt +3 -0
- baseline-16x-k128/rank_0_state.pt +3 -0
- baseline-16x-k128/state.pt +3 -0
- baseline-16x-k64/config.json +1 -0
- baseline-16x-k64/layers.10.mlp/cfg.json +1 -0
- baseline-16x-k64/layers.10.mlp/sae.safetensors +3 -0
- baseline-16x-k64/layers.15.mlp/cfg.json +1 -0
- baseline-16x-k64/layers.15.mlp/sae.safetensors +3 -0
- baseline-16x-k64/layers.20.mlp/cfg.json +1 -0
- baseline-16x-k64/layers.20.mlp/sae.safetensors +3 -0
- baseline-16x-k64/lr_scheduler_0.pt +3 -0
- baseline-16x-k64/optimizer_0.pt +3 -0
- baseline-16x-k64/optimizer_1.pt +3 -0
- baseline-16x-k64/rank_0_state.pt +3 -0
- baseline-16x-k64/state.pt +3 -0
- baseline-32x-k128/config.json +1 -0
- baseline-32x-k128/layers.10.mlp/cfg.json +1 -0
- baseline-32x-k128/layers.10.mlp/sae.safetensors +3 -0
- baseline-32x-k128/layers.15.mlp/cfg.json +1 -0
- baseline-32x-k128/layers.15.mlp/sae.safetensors +3 -0
- baseline-32x-k128/layers.20.mlp/cfg.json +1 -0
- baseline-32x-k128/layers.20.mlp/sae.safetensors +3 -0
- baseline-32x-k128/lr_scheduler_0.pt +3 -0
- baseline-32x-k128/optimizer_0.pt +3 -0
- baseline-32x-k128/optimizer_1.pt +3 -0
- baseline-32x-k128/rank_0_state.pt +3 -0
- baseline-32x-k128/state.pt +3 -0
- baseline-32x-k64/config.json +1 -0
- baseline-32x-k64/layers.10.mlp/cfg.json +1 -0
baseline-128x-k128/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 2946, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}}, "batch_size": 16, "grad_acc_steps": 2, "micro_acc_steps": 1, "stop_steps": 10000, "loss_fn": "fvu", "optimizer": "muon", "lr": 0.0008, "lr_warmup_steps": 1000, "k_decay_steps": 8000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.10.mlp", "layers.15.mlp", "layers.20.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "baseline-128x-k128", "wandb_log_frequency": 1, "wandb_project": "sparsify", "model": "HuggingFaceTB/SmolLM2-135M", "dataset": "EleutherAI/fineweb-edu-dedup-10b", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48}
|
baseline-128x-k128/layers.10.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 2946, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
|
baseline-128x-k128/layers.10.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:793aa1ee37e886b3210bd08102e19f72dfcdef6bb720805553eefc5387f6634d
|
3 |
+
size 341363360
|
baseline-128x-k128/layers.15.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 2946, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
|
baseline-128x-k128/layers.15.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de9cebcc0c5a85e8a14aaaa1ae869a0a433cce9fdb90b0776896a8ddb10f0a34
|
3 |
+
size 341363360
|
baseline-128x-k128/layers.20.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 2946, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
|
baseline-128x-k128/layers.20.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26ba2e9ab845db5bd1d11871b3fbb4a40fd09b2fd1c3e77737a2e27dc504f768
|
3 |
+
size 341363360
|
baseline-128x-k128/lr_scheduler_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7c91a088ea6ea032e6b80395a912fc02957b1f89ffdab30523b0aa45c5dde5b
|
3 |
+
size 1020
|
baseline-128x-k128/optimizer_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9097295520720ab8af201776a2159e142529d4016acfad8b5cdc639ff127bda
|
3 |
+
size 1023200816
|
baseline-128x-k128/optimizer_1.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8891d988d9f5f59394457c76541beb13abda955df6a2fa8ca021dfb80b7b1f39
|
3 |
+
size 1789112
|
baseline-128x-k128/rank_0_state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66c649d647071cf460df941d9ff32dc81790402b26bf9b0b3a5cb5e35a656210
|
3 |
+
size 1771319
|
baseline-128x-k128/state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8dfd1ef6b555df5ad7efb5e9597fba35316c7be30e2e221ea46de8e76592a08b
|
3 |
+
size 856
|
baseline-16x-k128/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"activation": "topk", "expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 1539, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}}, "batch_size": 32, "grad_acc_steps": 1, "micro_acc_steps": 1, "stop_steps": 5000, "loss_fn": "fvu", "optimizer": "muon", "lr": 0.0008, "lr_warmup_steps": 1000, "k_decay_steps": 4000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.10.mlp", "layers.15.mlp", "layers.20.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "baseline-16x-k128", "wandb_log_frequency": 1, "wandb_project": "sparsify", "model": "HuggingFaceTB/SmolLM2-135M", "dataset": "EleutherAI/fineweb-edu-dedup-10b", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48}
|
baseline-16x-k128/layers.10.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"activation": "topk", "expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
|
baseline-16x-k128/layers.10.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3892de718ae73bb56b8f8e0d0b3617ba1af9a511fc6472a9838333931afa9e1
|
3 |
+
size 43834000
|
baseline-16x-k128/layers.15.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"activation": "topk", "expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
|
baseline-16x-k128/layers.15.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fa69afcb6ab83e7b7ae8307c3d87a1e7cf18886cc95782942190e3c16ea3ace
|
3 |
+
size 43834000
|
baseline-16x-k128/layers.20.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"activation": "topk", "expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
|
baseline-16x-k128/layers.20.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30485800adb27bd9975f5bf64627ed19a295128972ee22e9afc4653cbdb97144
|
3 |
+
size 43834000
|
baseline-16x-k128/lr_scheduler_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddd0d07701c3ba499d6e8cb644e6586e74a8ad17d38cab9cd0bf72cf61f04cb5
|
3 |
+
size 1020
|
baseline-16x-k128/optimizer_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8815d008f3e428f71d070f111248c65b4bb197c5ff7e6fb17e79b585bc133fe1
|
3 |
+
size 131386928
|
baseline-16x-k128/optimizer_1.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c149707b13ee860f9e7d176dd7ac1633fd1344e086961e4087a5b0f8c30c12a1
|
3 |
+
size 240824
|
baseline-16x-k128/rank_0_state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a549c9fbf3c27871751cf668af3d375a1b7767c3eb2b35581b3cd7376971a245
|
3 |
+
size 222967
|
baseline-16x-k128/state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:241a3af5b7905444d1b0c938b38c55fee6d3d2c2be09cd70c374dc7c6bbf7954
|
3 |
+
size 856
|
baseline-16x-k64/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"activation": "topk", "expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 1491, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}}, "batch_size": 32, "grad_acc_steps": 1, "micro_acc_steps": 1, "stop_steps": 5000, "loss_fn": "fvu", "optimizer": "muon", "lr": 0.0008, "lr_warmup_steps": 1000, "k_decay_steps": 4000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.10.mlp", "layers.15.mlp", "layers.20.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "baseline-16x-k64", "wandb_log_frequency": 1, "wandb_project": "sparsify", "model": "HuggingFaceTB/SmolLM2-135M", "dataset": "EleutherAI/fineweb-edu-dedup-10b", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 24}
|
baseline-16x-k64/layers.10.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"activation": "topk", "expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
|
baseline-16x-k64/layers.10.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d48eae0e58a8f8b47fbbb49fa60271eaffce37b04f8b8009e454c7fabe015f6e
|
3 |
+
size 43834000
|
baseline-16x-k64/layers.15.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"activation": "topk", "expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
|
baseline-16x-k64/layers.15.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ab216c35d7dc8aaf1c82e024fe7b1c21eff2b12a7f4af59f927b7128a8ea3ac
|
3 |
+
size 43834000
|
baseline-16x-k64/layers.20.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"activation": "topk", "expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
|
baseline-16x-k64/layers.20.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5413673ade586e276a7c044cb017e4a5d06f83a8bac22fe3d518d09f7b2676ce
|
3 |
+
size 43834000
|
baseline-16x-k64/lr_scheduler_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddd0d07701c3ba499d6e8cb644e6586e74a8ad17d38cab9cd0bf72cf61f04cb5
|
3 |
+
size 1020
|
baseline-16x-k64/optimizer_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb057026e4b2721b14835d78031699e57541c6d20e01981078e40614908c4009
|
3 |
+
size 131386928
|
baseline-16x-k64/optimizer_1.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c95b5224282279c73a1b83679700cff2318c88cdc05aa3dedd2a65e654fe0f7d
|
3 |
+
size 240824
|
baseline-16x-k64/rank_0_state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a549c9fbf3c27871751cf668af3d375a1b7767c3eb2b35581b3cd7376971a245
|
3 |
+
size 222967
|
baseline-16x-k64/state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:241a3af5b7905444d1b0c938b38c55fee6d3d2c2be09cd70c374dc7c6bbf7954
|
3 |
+
size 856
|
baseline-32x-k128/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"activation": "topk", "expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 1539, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}}, "batch_size": 32, "grad_acc_steps": 1, "micro_acc_steps": 1, "stop_steps": 5000, "loss_fn": "fvu", "optimizer": "muon", "lr": 0.0008, "lr_warmup_steps": 1000, "k_decay_steps": 4000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.10.mlp", "layers.15.mlp", "layers.20.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "baseline-32x-k128", "wandb_log_frequency": 1, "wandb_project": "sparsify", "model": "HuggingFaceTB/SmolLM2-135M", "dataset": "EleutherAI/fineweb-edu-dedup-10b", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48}
|
baseline-32x-k128/layers.10.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"activation": "topk", "expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
|
baseline-32x-k128/layers.10.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e5d2ae1d462e1bf6a9f8210d6566b996420c5ce585887917acea1f31753a2a4
|
3 |
+
size 86338200
|
baseline-32x-k128/layers.15.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"activation": "topk", "expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
|
baseline-32x-k128/layers.15.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e06c237ce049e55e1c68f8c5b6589ca5e4cc415c1c1bcb2ed577a42a61ccc25c
|
3 |
+
size 86338200
|
baseline-32x-k128/layers.20.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"activation": "topk", "expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
|
baseline-32x-k128/layers.20.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:249fd0796c562caf9ca9014a1866c3aabcf90eb88ff2e282398808b2f1c605a2
|
3 |
+
size 86338200
|
baseline-32x-k128/lr_scheduler_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddd0d07701c3ba499d6e8cb644e6586e74a8ad17d38cab9cd0bf72cf61f04cb5
|
3 |
+
size 1020
|
baseline-32x-k128/optimizer_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30e926abd84c6ac57a220387b39065832038780cf2dd51877e99cb8cbca4dff0
|
3 |
+
size 258788912
|
baseline-32x-k128/optimizer_1.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fdd9bd84a6b36c879668d085406271490da21f631d85e0e3c42fa45755fc8e3b
|
3 |
+
size 462008
|
baseline-32x-k128/rank_0_state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e90ee6bac0c6e8e7f7d49a9ecee6ac28119c6154b76ae24b77492e193a36b88e
|
3 |
+
size 444151
|
baseline-32x-k128/state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:241a3af5b7905444d1b0c938b38c55fee6d3d2c2be09cd70c374dc7c6bbf7954
|
3 |
+
size 856
|
baseline-32x-k64/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"activation": "topk", "expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 1491, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}}, "batch_size": 32, "grad_acc_steps": 1, "micro_acc_steps": 1, "stop_steps": 5000, "loss_fn": "fvu", "optimizer": "muon", "lr": 0.0008, "lr_warmup_steps": 1000, "k_decay_steps": 4000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.10.mlp", "layers.15.mlp", "layers.20.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "baseline-32x-k64", "wandb_log_frequency": 1, "wandb_project": "sparsify", "model": "HuggingFaceTB/SmolLM2-135M", "dataset": "EleutherAI/fineweb-edu-dedup-10b", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 24}
|
baseline-32x-k64/layers.10.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"activation": "topk", "expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
|