{ | |
"balance_loss_coef": 0.0, | |
"bias": false, | |
"block_size": 1023, | |
"dropout": 0.0, | |
"n_embd": 512, | |
"n_head": 8, | |
"n_layer": 8, | |
"num_experts": 8, | |
"num_experts_per_tok": 2, | |
"use_moe": true, | |
"vocab_size": 32 | |
} |
{ | |
"balance_loss_coef": 0.0, | |
"bias": false, | |
"block_size": 1023, | |
"dropout": 0.0, | |
"n_embd": 512, | |
"n_head": 8, | |
"n_layer": 8, | |
"num_experts": 8, | |
"num_experts_per_tok": 2, | |
"use_moe": true, | |
"vocab_size": 32 | |
} |