{ "balance_loss_coef": 0.01, "bias": false, "block_size": 1023, "dropout": 0.0, "moe_layers": [ 0, 1, 2, 3, 4, 5, 6, 7 ], "n_embd": 512, "n_head": 8, "n_layer": 8, "num_experts": 8, "num_experts_per_tok": 2, "use_moe": true, "vocab_size": 32 }