Llama3.2-Mamba2-3B-dpo / mamba_config.json
Junxiong Wang
add models
d2ccbab
raw
history blame contribute delete
470 Bytes
{
"d_model": 3072,
"ssm_cfg": {
"expand": 1,
"ngroups": 24,
"d_state": 128
},
"rms_norm_eps": 1e-05,
"vocab_size": null,
"d_inner": 3072,
"d_xb": 1024,
"intermediate_size": 8192,
"hidden_act": "silu",
"n_layer": 28,
"attn_layers": [
1,
3,
5,
7,
9,
11,
13,
15,
17,
19,
21,
23,
25,
27
]
}