{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 768, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.0.mlp", "layers.1.mlp", "layers.2.mlp", "layers.3.mlp", "layers.4.mlp", "layers.5.mlp", "layers.6.mlp", "layers.7.mlp", "layers.8.mlp", "layers.9.mlp", "layers.10.mlp", "layers.11.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k32-sae-mlp-768", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 42, "data_preprocessing_num_proc": 48} |