{ "mlp_bias": false, "attn_bias": false, "rotary_base": 10000, "rotary_scaling": { "beta_fast": 32, "beta_slow": 1, "factor": 40, "mscale": 1.0, "mscale_all_dim": 1.0, "original_max_position_embeddings": 4096, "type": "yarn" }, "residual_mlp": false, "disable_weight_only_quant_plugin": false, "moe": { "num_experts": 256, "shared_expert_intermediate_size": 2048, "top_k": 8, "normalization_mode": 3, "sparse_mixer_epsilon": 0.01, "tp_mode": 0, "topk_method": 2, "device_limited_n_group": 8, "device_limited_topk_group": 4, "device_limited_routed_scaling_factor": 2.5 }, "remove_duplicated_kv_heads": false, "fc_after_embed": false, "use_input_layernorm_in_first_layer": true, "use_last_layernorm": true, "layer_idx_offset": 0, "architecture": "DeepseekV2ForCausalLM", "dtype": "bfloat16", "vocab_size": 129280, "hidden_size": 7168, "num_hidden_layers": 61, "num_attention_heads": 128, "hidden_act": "swiglu", "logits_dtype": "float32", "norm_epsilon": 1e-06, "runtime_defaults": null, "position_embedding_type": "rope_gpt_neox", "num_key_value_heads": 128, "intermediate_size": 18432, "max_position_embeddings": 163840, "mapping": { "world_size": 4, "gpus_per_node": 8, "cp_size": 1, "tp_size": 4, "pp_size": 1, "moe_tp_size": 4, "moe_ep_size": 1 }, "quantization": { "quant_algo": "W4A16", "kv_cache_quant_algo": null, "group_size": 128, "smoothquant_val": 0.5, "clamp_val": null, "use_meta_recipe": false, "has_zero_point": false, "pre_quant_scale": false, "exclude_modules": null }, "use_parallel_embedding": false, "embedding_sharding_dim": 0, "head_size": 56, "qk_layernorm": false, "rotary_embedding_dim": 56, "moe_inter_size": 2048, "kv_lora_rank": 512, "q_lora_rank": 1536, "qk_nope_head_dim": 128, "qk_rope_head_dim": 64, "v_head_dim": 128, "topk_method": "noaux_tc", "first_k_dense_replace": 3, "moe_layer_freq": 1, "coring_func": "sigmoid", "fp8_format": false }