{
  "activation_type": "swiglu",
  "alibi": false,
  "alibi_bias_max": 8.0,
  "architectures": [
    "AIGCodeXMoEForCausalLM"
  ],
  "attention_dropout": 0.0,
  "attention_layer_norm": false,
  "attention_layer_norm_with_affine": false,
  "batch_size": 4,
  "bias_for_layer_norm": false,
  "block_group_size": 1,
  "block_type": "sequential",
  "clip_qkv": null,
  "d_model": 4096,
  "deepnorm": false,
  "embedding_dropout": 0.0,
  "embedding_size": 65280,
  "encoder_decoder": false,
  "eos_token_id": 2,
  "eval_max_sequence_length": null,
  "exp_dim_ratio": 1,
  "flash_attention": false,
  "gate_level": "token",
  "gate_sample_ratio": 1,
  "gate_softmax_temperature": 8.0,
  "gshard": false,
  "include_bias": false,
  "init_cutoff_factor": null,
  "init_device": "meta",
  "init_fn": "normal",
  "init_std": 0.01,
  "intermediate_size": 16384,
  "latent_attention": false,
  "latent_attention_dim": 512,
  "layer_norm_eps": 1e-05,
  "layer_norm_type": "default",
  "layer_norm_with_affine": false,
  "layer_share": false,
  "layer_share_mlp_version": 1,
  "layer_std_check": false,
  "max_sequence_length": 4096,
  "mlp_hidden_size": null,
  "mlp_ratio": 4,
  "mobile_llm_repeat_num": 1,
  "model_type": "hf_aigcodexmoe",
  "moe_act_ckpt_ratio": 1,
  "moe_auxiliary_loss": false,
  "moe_auxiliary_loss_weight": 0.0,
  "moe_batch_prioritized_routing": false,
  "moe_eval_capacity_token_fraction": 0.25,
  "moe_expert_count": 4,
  "moe_expert_count_mluti_level": null,
  "moe_freq": 2,
  "moe_freq_pos": 0,
  "moe_gate_input_type": "concat",
  "moe_gate_loss_combine_method": "average",
  "moe_gate_loss_weight": 0.0,
  "moe_gate_no_grad": false,
  "moe_gating_use_fp32": true,
  "moe_logging": false,
  "moe_normalize_gate_prob_before_dropping": false,
  "moe_second_expert_policy": "sampling",
  "moe_share_expert_count": 0,
  "moe_top1_expert": true,
  "moe_topn_expert": 1,
  "moe_version": 1,
  "multi_query_attention": false,
  "n_heads": 32,
  "n_kv_heads": null,
  "n_layers": 22,
  "pad_token_id": 0,
  "ple_layer_num": 0,
  "ple_layernorm": false,
  "precision": "amp_bf16",
  "residual_dropout": 0.0,
  "rope": true,
  "rope_base": 30000,
  "rope_ext_ratio": 1,
  "rope_full_precision": true,
  "scale_logits": false,
  "sft_ans_mask": false,
  "share_layer_groups": 1,
  "share_moe_groups": 1,
  "torch_dtype": "float32",
  "transformers_version": "4.40.2",
  "use_cache": true,
  "use_mobile_llm": false,
  "use_moe": false,
  "use_ple": false,
  "use_xmoe": true,
  "vocab_size": 64000,
  "weight_tying": false
}