{ "mlp_bias": false, "attn_bias": false, "rotary_base": 500000.0, "rotary_scaling": { "factor": 8.0, "low_freq_factor": 1.0, "high_freq_factor": 4.0, "original_max_position_embeddings": 8192, "rope_type": "llama3" }, "residual_mlp": false, "disable_weight_only_quant_plugin": false, "moe": { "num_experts": 0, "top_k": 0, "normalization_mode": null, "tp_mode": 0 }, "architecture": "LlamaForCausalLM", "dtype": "bfloat16", "vocab_size": 128256, "hidden_size": 8192, "num_hidden_layers": 80, "num_attention_heads": 64, "hidden_act": "silu", "logits_dtype": "float32", "norm_epsilon": 1e-05, "position_embedding_type": "rope_gpt_neox", "max_position_embeddings": 131072, "num_key_value_heads": 8, "intermediate_size": 28672, "mapping": { "world_size": 4, "gpus_per_node": 8, "tp_size": 4, "pp_size": 1, "moe_tp_size": 4, "moe_ep_size": 1 }, "quantization": { "quant_algo": null, "kv_cache_quant_algo": null, "group_size": 128, "smoothquant_val": null, "clamp_val": null, "has_zero_point": false, "pre_quant_scale": false, "exclude_modules": null }, "use_parallel_embedding": false, "embedding_sharding_dim": 0, "share_embedding_table": false, "head_size": 128, "qk_layernorm": false }