File size: 1,999 Bytes

5a850b0

{
    "cache_cfg": {
        "dataset_repo": "EleutherAI/fineweb-edu-dedup-10b",
        "dataset_split": "train",
        "dataset_name": "",
        "dataset_column": "text",
        "batch_size": 16,
        "cache_ctx_len": 256,
        "n_tokens": 1000000,
        "n_splits": 5
    },
    "constructor_cfg": {
        "faiss_embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
        "faiss_embedding_cache_dir": ".embedding_cache",
        "faiss_embedding_cache_enabled": true,
        "example_ctx_len": 32,
        "min_examples": 200,
        "n_non_activating": 50,
        "center_examples": true,
        "non_activating_source": "random",
        "neighbours_type": "co-occurrence"
    },
    "sampler_cfg": {
        "n_examples_train": 40,
        "n_examples_test": 50,
        "n_quantiles": 10,
        "train_type": "quantiles",
        "test_type": "quantiles",
        "ratio_top": 0.2
    },
    "model": "meta-llama/Llama-3.2-1B",
    "sparse_model": "nev/Llama-3.2-1B-mntss-skip-transcoder",
    "hookpoints": [
        "layers.0.mlp",
        "layers.1.mlp",
        "layers.2.mlp",
        "layers.3.mlp",
        "layers.4.mlp",
        "layers.5.mlp",
        "layers.6.mlp",
        "layers.7.mlp",
        "layers.8.mlp",
        "layers.9.mlp",
        "layers.10.mlp",
        "layers.11.mlp",
        "layers.12.mlp",
        "layers.13.mlp",
        "layers.14.mlp",
        "layers.15.mlp"
    ],
    "explainer_model": "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4",
    "explainer_model_max_len": 5120,
    "explainer_provider": "offline",
    "explainer": "default",
    "scorers": [
        "fuzz",
        "detection"
    ],
    "name": "transcoder-llama-131k-mntss-1m",
    "max_latents": null,
    "filter_bos": false,
    "log_probs": false,
    "load_in_8bit": false,
    "hf_token": null,
    "pipeline_num_proc": 48,
    "num_gpus": 1,
    "seed": 22,
    "verbose": true,
    "num_examples_per_scorer_prompt": 5,
    "overwrite": []
}