MrGonao commited on
Commit
8e89a86
·
verified ·
1 Parent(s): d6e3db8

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. baseline-128x-k128/config.json +1 -0
  2. baseline-128x-k128/layers.10.mlp/cfg.json +1 -0
  3. baseline-128x-k128/layers.10.mlp/sae.safetensors +3 -0
  4. baseline-128x-k128/layers.15.mlp/cfg.json +1 -0
  5. baseline-128x-k128/layers.15.mlp/sae.safetensors +3 -0
  6. baseline-128x-k128/layers.20.mlp/cfg.json +1 -0
  7. baseline-128x-k128/layers.20.mlp/sae.safetensors +3 -0
  8. baseline-128x-k128/lr_scheduler_0.pt +3 -0
  9. baseline-128x-k128/optimizer_0.pt +3 -0
  10. baseline-128x-k128/optimizer_1.pt +3 -0
  11. baseline-128x-k128/rank_0_state.pt +3 -0
  12. baseline-128x-k128/state.pt +3 -0
  13. baseline-16x-k128/config.json +1 -0
  14. baseline-16x-k128/layers.10.mlp/cfg.json +1 -0
  15. baseline-16x-k128/layers.10.mlp/sae.safetensors +3 -0
  16. baseline-16x-k128/layers.15.mlp/cfg.json +1 -0
  17. baseline-16x-k128/layers.15.mlp/sae.safetensors +3 -0
  18. baseline-16x-k128/layers.20.mlp/cfg.json +1 -0
  19. baseline-16x-k128/layers.20.mlp/sae.safetensors +3 -0
  20. baseline-16x-k128/lr_scheduler_0.pt +3 -0
  21. baseline-16x-k128/optimizer_0.pt +3 -0
  22. baseline-16x-k128/optimizer_1.pt +3 -0
  23. baseline-16x-k128/rank_0_state.pt +3 -0
  24. baseline-16x-k128/state.pt +3 -0
  25. baseline-16x-k64/config.json +1 -0
  26. baseline-16x-k64/layers.10.mlp/cfg.json +1 -0
  27. baseline-16x-k64/layers.10.mlp/sae.safetensors +3 -0
  28. baseline-16x-k64/layers.15.mlp/cfg.json +1 -0
  29. baseline-16x-k64/layers.15.mlp/sae.safetensors +3 -0
  30. baseline-16x-k64/layers.20.mlp/cfg.json +1 -0
  31. baseline-16x-k64/layers.20.mlp/sae.safetensors +3 -0
  32. baseline-16x-k64/lr_scheduler_0.pt +3 -0
  33. baseline-16x-k64/optimizer_0.pt +3 -0
  34. baseline-16x-k64/optimizer_1.pt +3 -0
  35. baseline-16x-k64/rank_0_state.pt +3 -0
  36. baseline-16x-k64/state.pt +3 -0
  37. baseline-32x-k128/config.json +1 -0
  38. baseline-32x-k128/layers.10.mlp/cfg.json +1 -0
  39. baseline-32x-k128/layers.10.mlp/sae.safetensors +3 -0
  40. baseline-32x-k128/layers.15.mlp/cfg.json +1 -0
  41. baseline-32x-k128/layers.15.mlp/sae.safetensors +3 -0
  42. baseline-32x-k128/layers.20.mlp/cfg.json +1 -0
  43. baseline-32x-k128/layers.20.mlp/sae.safetensors +3 -0
  44. baseline-32x-k128/lr_scheduler_0.pt +3 -0
  45. baseline-32x-k128/optimizer_0.pt +3 -0
  46. baseline-32x-k128/optimizer_1.pt +3 -0
  47. baseline-32x-k128/rank_0_state.pt +3 -0
  48. baseline-32x-k128/state.pt +3 -0
  49. baseline-32x-k64/config.json +1 -0
  50. baseline-32x-k64/layers.10.mlp/cfg.json +1 -0
baseline-128x-k128/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 2946, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}}, "batch_size": 16, "grad_acc_steps": 2, "micro_acc_steps": 1, "stop_steps": 10000, "loss_fn": "fvu", "optimizer": "muon", "lr": 0.0008, "lr_warmup_steps": 1000, "k_decay_steps": 8000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.10.mlp", "layers.15.mlp", "layers.20.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "baseline-128x-k128", "wandb_log_frequency": 1, "wandb_project": "sparsify", "model": "HuggingFaceTB/SmolLM2-135M", "dataset": "EleutherAI/fineweb-edu-dedup-10b", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48}
baseline-128x-k128/layers.10.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 2946, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
baseline-128x-k128/layers.10.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:793aa1ee37e886b3210bd08102e19f72dfcdef6bb720805553eefc5387f6634d
3
+ size 341363360
baseline-128x-k128/layers.15.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 2946, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
baseline-128x-k128/layers.15.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de9cebcc0c5a85e8a14aaaa1ae869a0a433cce9fdb90b0776896a8ddb10f0a34
3
+ size 341363360
baseline-128x-k128/layers.20.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 2946, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
baseline-128x-k128/layers.20.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26ba2e9ab845db5bd1d11871b3fbb4a40fd09b2fd1c3e77737a2e27dc504f768
3
+ size 341363360
baseline-128x-k128/lr_scheduler_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7c91a088ea6ea032e6b80395a912fc02957b1f89ffdab30523b0aa45c5dde5b
3
+ size 1020
baseline-128x-k128/optimizer_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9097295520720ab8af201776a2159e142529d4016acfad8b5cdc639ff127bda
3
+ size 1023200816
baseline-128x-k128/optimizer_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8891d988d9f5f59394457c76541beb13abda955df6a2fa8ca021dfb80b7b1f39
3
+ size 1789112
baseline-128x-k128/rank_0_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66c649d647071cf460df941d9ff32dc81790402b26bf9b0b3a5cb5e35a656210
3
+ size 1771319
baseline-128x-k128/state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dfd1ef6b555df5ad7efb5e9597fba35316c7be30e2e221ea46de8e76592a08b
3
+ size 856
baseline-16x-k128/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"activation": "topk", "expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 1539, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}}, "batch_size": 32, "grad_acc_steps": 1, "micro_acc_steps": 1, "stop_steps": 5000, "loss_fn": "fvu", "optimizer": "muon", "lr": 0.0008, "lr_warmup_steps": 1000, "k_decay_steps": 4000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.10.mlp", "layers.15.mlp", "layers.20.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "baseline-16x-k128", "wandb_log_frequency": 1, "wandb_project": "sparsify", "model": "HuggingFaceTB/SmolLM2-135M", "dataset": "EleutherAI/fineweb-edu-dedup-10b", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48}
baseline-16x-k128/layers.10.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
baseline-16x-k128/layers.10.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3892de718ae73bb56b8f8e0d0b3617ba1af9a511fc6472a9838333931afa9e1
3
+ size 43834000
baseline-16x-k128/layers.15.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
baseline-16x-k128/layers.15.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fa69afcb6ab83e7b7ae8307c3d87a1e7cf18886cc95782942190e3c16ea3ace
3
+ size 43834000
baseline-16x-k128/layers.20.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
baseline-16x-k128/layers.20.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30485800adb27bd9975f5bf64627ed19a295128972ee22e9afc4653cbdb97144
3
+ size 43834000
baseline-16x-k128/lr_scheduler_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd0d07701c3ba499d6e8cb644e6586e74a8ad17d38cab9cd0bf72cf61f04cb5
3
+ size 1020
baseline-16x-k128/optimizer_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8815d008f3e428f71d070f111248c65b4bb197c5ff7e6fb17e79b585bc133fe1
3
+ size 131386928
baseline-16x-k128/optimizer_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c149707b13ee860f9e7d176dd7ac1633fd1344e086961e4087a5b0f8c30c12a1
3
+ size 240824
baseline-16x-k128/rank_0_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a549c9fbf3c27871751cf668af3d375a1b7767c3eb2b35581b3cd7376971a245
3
+ size 222967
baseline-16x-k128/state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241a3af5b7905444d1b0c938b38c55fee6d3d2c2be09cd70c374dc7c6bbf7954
3
+ size 856
baseline-16x-k64/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"activation": "topk", "expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 1491, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}}, "batch_size": 32, "grad_acc_steps": 1, "micro_acc_steps": 1, "stop_steps": 5000, "loss_fn": "fvu", "optimizer": "muon", "lr": 0.0008, "lr_warmup_steps": 1000, "k_decay_steps": 4000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.10.mlp", "layers.15.mlp", "layers.20.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "baseline-16x-k64", "wandb_log_frequency": 1, "wandb_project": "sparsify", "model": "HuggingFaceTB/SmolLM2-135M", "dataset": "EleutherAI/fineweb-edu-dedup-10b", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 24}
baseline-16x-k64/layers.10.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
baseline-16x-k64/layers.10.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d48eae0e58a8f8b47fbbb49fa60271eaffce37b04f8b8009e454c7fabe015f6e
3
+ size 43834000
baseline-16x-k64/layers.15.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
baseline-16x-k64/layers.15.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ab216c35d7dc8aaf1c82e024fe7b1c21eff2b12a7f4af59f927b7128a8ea3ac
3
+ size 43834000
baseline-16x-k64/layers.20.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
baseline-16x-k64/layers.20.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5413673ade586e276a7c044cb017e4a5d06f83a8bac22fe3d518d09f7b2676ce
3
+ size 43834000
baseline-16x-k64/lr_scheduler_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd0d07701c3ba499d6e8cb644e6586e74a8ad17d38cab9cd0bf72cf61f04cb5
3
+ size 1020
baseline-16x-k64/optimizer_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb057026e4b2721b14835d78031699e57541c6d20e01981078e40614908c4009
3
+ size 131386928
baseline-16x-k64/optimizer_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c95b5224282279c73a1b83679700cff2318c88cdc05aa3dedd2a65e654fe0f7d
3
+ size 240824
baseline-16x-k64/rank_0_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a549c9fbf3c27871751cf668af3d375a1b7767c3eb2b35581b3cd7376971a245
3
+ size 222967
baseline-16x-k64/state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241a3af5b7905444d1b0c938b38c55fee6d3d2c2be09cd70c374dc7c6bbf7954
3
+ size 856
baseline-32x-k128/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"activation": "topk", "expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 1539, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}}, "batch_size": 32, "grad_acc_steps": 1, "micro_acc_steps": 1, "stop_steps": 5000, "loss_fn": "fvu", "optimizer": "muon", "lr": 0.0008, "lr_warmup_steps": 1000, "k_decay_steps": 4000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.10.mlp", "layers.15.mlp", "layers.20.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "baseline-32x-k128", "wandb_log_frequency": 1, "wandb_project": "sparsify", "model": "HuggingFaceTB/SmolLM2-135M", "dataset": "EleutherAI/fineweb-edu-dedup-10b", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48}
baseline-32x-k128/layers.10.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
baseline-32x-k128/layers.10.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e5d2ae1d462e1bf6a9f8210d6566b996420c5ce585887917acea1f31753a2a4
3
+ size 86338200
baseline-32x-k128/layers.15.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
baseline-32x-k128/layers.15.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e06c237ce049e55e1c68f8c5b6589ca5e4cc415c1c1bcb2ed577a42a61ccc25c
3
+ size 86338200
baseline-32x-k128/layers.20.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}
baseline-32x-k128/layers.20.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:249fd0796c562caf9ca9014a1866c3aabcf90eb88ff2e282398808b2f1c605a2
3
+ size 86338200
baseline-32x-k128/lr_scheduler_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd0d07701c3ba499d6e8cb644e6586e74a8ad17d38cab9cd0bf72cf61f04cb5
3
+ size 1020
baseline-32x-k128/optimizer_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30e926abd84c6ac57a220387b39065832038780cf2dd51877e99cb8cbca4dff0
3
+ size 258788912
baseline-32x-k128/optimizer_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdd9bd84a6b36c879668d085406271490da21f631d85e0e3c42fa45755fc8e3b
3
+ size 462008
baseline-32x-k128/rank_0_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e90ee6bac0c6e8e7f7d49a9ecee6ac28119c6154b76ae24b77492e193a36b88e
3
+ size 444151
baseline-32x-k128/state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241a3af5b7905444d1b0c938b38c55fee6d3d2c2be09cd70c374dc7c6bbf7954
3
+ size 856
baseline-32x-k64/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"activation": "topk", "expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 1491, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}}, "batch_size": 32, "grad_acc_steps": 1, "micro_acc_steps": 1, "stop_steps": 5000, "loss_fn": "fvu", "optimizer": "muon", "lr": 0.0008, "lr_warmup_steps": 1000, "k_decay_steps": 4000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.10.mlp", "layers.15.mlp", "layers.20.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "baseline-32x-k64", "wandb_log_frequency": 1, "wandb_project": "sparsify", "model": "HuggingFaceTB/SmolLM2-135M", "dataset": "EleutherAI/fineweb-edu-dedup-10b", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 24}
baseline-32x-k64/layers.10.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false, "skip_connection": true, "transcode": true, "optimized_encoder_config": "None_", "pkm_config": {"pad": false, "softmax": false, "heads": 1, "bias": false, "init_scale": 1.0}, "kronecker_config": {"in_group": 2, "out_group": 4, "u": 4, "lora_dim": 1.0}, "d_in": 576}