{ | |
"balance_loss_coef": 0.01, | |
"bias": false, | |
"block_size": 1023, | |
"dropout": 0.0, | |
"moe_layers": [ | |
0, | |
1, | |
2, | |
3, | |
4, | |
5, | |
6, | |
7 | |
], | |
"n_embd": 512, | |
"n_head": 8, | |
"n_layer": 8, | |
"num_experts": 8, | |
"num_experts_per_tok": 2, | |
"use_moe": true, | |
"vocab_size": 32 | |
} |