{ "metadata": { "total_size": 632989704192 }, "weight_map": { "model.embed_tokens.weight": "pytorch_model-00000.bin", "lm_head.weight": "pytorch_model-00000.bin", "model.norm.scale": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.0.linear.weight": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.1.linear.weight": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.2.linear.weight": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.3.linear.weight": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.4.linear.weight": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.5.linear.weight": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.6.linear.weight": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.7.linear.weight": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.0.linear_1.weight": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.1.linear_1.weight": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.2.linear_1.weight": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.3.linear_1.weight": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.4.linear_1.weight": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.5.linear_1.weight": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.6.linear_1.weight": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.7.linear_1.weight": "pytorch_model-00000.bin", "model.layers.0.moe_block.experts.0.linear_v.weight": "pytorch_model-00001.bin", "model.layers.0.moe_block.experts.1.linear_v.weight": "pytorch_model-00001.bin", "model.layers.0.moe_block.experts.2.linear_v.weight": "pytorch_model-00001.bin", "model.layers.0.moe_block.experts.3.linear_v.weight": "pytorch_model-00001.bin", "model.layers.0.moe_block.experts.4.linear_v.weight": "pytorch_model-00001.bin", "model.layers.0.moe_block.experts.5.linear_v.weight": "pytorch_model-00001.bin", "model.layers.0.moe_block.experts.6.linear_v.weight": "pytorch_model-00001.bin", "model.layers.0.moe_block.experts.7.linear_v.weight": "pytorch_model-00001.bin", "model.layers.0.attn.k_proj.weight": "pytorch_model-00001.bin", "model.layers.0.attn.o_proj.weight": "pytorch_model-00001.bin", "model.layers.0.attn.q_proj.weight": "pytorch_model-00001.bin", "model.layers.0.attn.v_proj.weight": "pytorch_model-00001.bin", "model.layers.0.pre_attn_norm.scale": "pytorch_model-00001.bin", "model.layers.0.post_attn_norm.scale": "pytorch_model-00001.bin", "model.layers.0.pre_moe_norm.scale": "pytorch_model-00001.bin", "model.layers.0.post_moe_norm.scale": "pytorch_model-00001.bin", "model.layers.0.moe_block.gate.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.0.linear.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.1.linear.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.2.linear.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.3.linear.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.4.linear.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.5.linear.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.6.linear.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.7.linear.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.0.linear_1.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.1.linear_1.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.2.linear_1.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.3.linear_1.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.4.linear_1.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.5.linear_1.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.6.linear_1.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.7.linear_1.weight": "pytorch_model-00001.bin", "model.layers.1.moe_block.experts.0.linear_v.weight": "pytorch_model-00002.bin", "model.layers.1.moe_block.experts.1.linear_v.weight": "pytorch_model-00002.bin", "model.layers.1.moe_block.experts.2.linear_v.weight": "pytorch_model-00002.bin", "model.layers.1.moe_block.experts.3.linear_v.weight": "pytorch_model-00002.bin", "model.layers.1.moe_block.experts.4.linear_v.weight": "pytorch_model-00002.bin", "model.layers.1.moe_block.experts.5.linear_v.weight": "pytorch_model-00002.bin", "model.layers.1.moe_block.experts.6.linear_v.weight": "pytorch_model-00002.bin", "model.layers.1.moe_block.experts.7.linear_v.weight": "pytorch_model-00002.bin", "model.layers.1.attn.k_proj.weight": "pytorch_model-00002.bin", "model.layers.1.attn.o_proj.weight": "pytorch_model-00002.bin", "model.layers.1.attn.q_proj.weight": "pytorch_model-00002.bin", "model.layers.1.attn.v_proj.weight": "pytorch_model-00002.bin", "model.layers.1.pre_attn_norm.scale": "pytorch_model-00002.bin", "model.layers.1.post_attn_norm.scale": "pytorch_model-00002.bin", "model.layers.1.pre_moe_norm.scale": "pytorch_model-00002.bin", "model.layers.1.post_moe_norm.scale": "pytorch_model-00002.bin", "model.layers.1.moe_block.gate.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.0.linear.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.1.linear.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.2.linear.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.3.linear.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.4.linear.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.5.linear.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.6.linear.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.7.linear.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.0.linear_1.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.1.linear_1.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.2.linear_1.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.3.linear_1.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.4.linear_1.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.5.linear_1.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.6.linear_1.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.7.linear_1.weight": "pytorch_model-00002.bin", "model.layers.10.moe_block.experts.0.linear_v.weight": "pytorch_model-00003.bin", "model.layers.10.moe_block.experts.1.linear_v.weight": "pytorch_model-00003.bin", "model.layers.10.moe_block.experts.2.linear_v.weight": "pytorch_model-00003.bin", "model.layers.10.moe_block.experts.3.linear_v.weight": "pytorch_model-00003.bin", "model.layers.10.moe_block.experts.4.linear_v.weight": "pytorch_model-00003.bin", "model.layers.10.moe_block.experts.5.linear_v.weight": "pytorch_model-00003.bin", "model.layers.10.moe_block.experts.6.linear_v.weight": "pytorch_model-00003.bin", "model.layers.10.moe_block.experts.7.linear_v.weight": "pytorch_model-00003.bin", "model.layers.10.attn.k_proj.weight": "pytorch_model-00003.bin", "model.layers.10.attn.o_proj.weight": "pytorch_model-00003.bin", "model.layers.10.attn.q_proj.weight": "pytorch_model-00003.bin", "model.layers.10.attn.v_proj.weight": "pytorch_model-00003.bin", "model.layers.10.pre_attn_norm.scale": "pytorch_model-00003.bin", "model.layers.10.post_attn_norm.scale": "pytorch_model-00003.bin", "model.layers.10.pre_moe_norm.scale": "pytorch_model-00003.bin", "model.layers.10.post_moe_norm.scale": "pytorch_model-00003.bin", "model.layers.10.moe_block.gate.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.0.linear.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.1.linear.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.2.linear.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.3.linear.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.4.linear.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.5.linear.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.6.linear.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.7.linear.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.0.linear_1.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.1.linear_1.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.2.linear_1.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.3.linear_1.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.4.linear_1.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.5.linear_1.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.6.linear_1.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.7.linear_1.weight": "pytorch_model-00003.bin", "model.layers.11.moe_block.experts.0.linear_v.weight": "pytorch_model-00004.bin", "model.layers.11.moe_block.experts.1.linear_v.weight": "pytorch_model-00004.bin", "model.layers.11.moe_block.experts.2.linear_v.weight": "pytorch_model-00004.bin", "model.layers.11.moe_block.experts.3.linear_v.weight": "pytorch_model-00004.bin", "model.layers.11.moe_block.experts.4.linear_v.weight": "pytorch_model-00004.bin", "model.layers.11.moe_block.experts.5.linear_v.weight": "pytorch_model-00004.bin", "model.layers.11.moe_block.experts.6.linear_v.weight": "pytorch_model-00004.bin", "model.layers.11.moe_block.experts.7.linear_v.weight": "pytorch_model-00004.bin", "model.layers.11.attn.k_proj.weight": "pytorch_model-00004.bin", "model.layers.11.attn.o_proj.weight": "pytorch_model-00004.bin", "model.layers.11.attn.q_proj.weight": "pytorch_model-00004.bin", "model.layers.11.attn.v_proj.weight": "pytorch_model-00004.bin", "model.layers.11.pre_attn_norm.scale": "pytorch_model-00004.bin", "model.layers.11.post_attn_norm.scale": "pytorch_model-00004.bin", "model.layers.11.pre_moe_norm.scale": "pytorch_model-00004.bin", "model.layers.11.post_moe_norm.scale": "pytorch_model-00004.bin", "model.layers.11.moe_block.gate.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.0.linear.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.1.linear.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.2.linear.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.3.linear.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.4.linear.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.5.linear.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.6.linear.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.7.linear.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.0.linear_1.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.1.linear_1.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.2.linear_1.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.3.linear_1.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.4.linear_1.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.5.linear_1.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.6.linear_1.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.7.linear_1.weight": "pytorch_model-00004.bin", "model.layers.12.moe_block.experts.0.linear_v.weight": "pytorch_model-00005.bin", "model.layers.12.moe_block.experts.1.linear_v.weight": "pytorch_model-00005.bin", "model.layers.12.moe_block.experts.2.linear_v.weight": "pytorch_model-00005.bin", "model.layers.12.moe_block.experts.3.linear_v.weight": "pytorch_model-00005.bin", "model.layers.12.moe_block.experts.4.linear_v.weight": "pytorch_model-00005.bin", "model.layers.12.moe_block.experts.5.linear_v.weight": "pytorch_model-00005.bin", "model.layers.12.moe_block.experts.6.linear_v.weight": "pytorch_model-00005.bin", "model.layers.12.moe_block.experts.7.linear_v.weight": "pytorch_model-00005.bin", "model.layers.12.attn.k_proj.weight": "pytorch_model-00005.bin", "model.layers.12.attn.o_proj.weight": "pytorch_model-00005.bin", "model.layers.12.attn.q_proj.weight": "pytorch_model-00005.bin", "model.layers.12.attn.v_proj.weight": "pytorch_model-00005.bin", "model.layers.12.pre_attn_norm.scale": "pytorch_model-00005.bin", "model.layers.12.post_attn_norm.scale": "pytorch_model-00005.bin", "model.layers.12.pre_moe_norm.scale": "pytorch_model-00005.bin", "model.layers.12.post_moe_norm.scale": "pytorch_model-00005.bin", "model.layers.12.moe_block.gate.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.0.linear.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.1.linear.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.2.linear.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.3.linear.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.4.linear.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.5.linear.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.6.linear.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.7.linear.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.0.linear_1.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.1.linear_1.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.2.linear_1.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.3.linear_1.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.4.linear_1.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.5.linear_1.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.6.linear_1.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.7.linear_1.weight": "pytorch_model-00005.bin", "model.layers.13.moe_block.experts.0.linear_v.weight": "pytorch_model-00006.bin", "model.layers.13.moe_block.experts.1.linear_v.weight": "pytorch_model-00006.bin", "model.layers.13.moe_block.experts.2.linear_v.weight": "pytorch_model-00006.bin", "model.layers.13.moe_block.experts.3.linear_v.weight": "pytorch_model-00006.bin", "model.layers.13.moe_block.experts.4.linear_v.weight": "pytorch_model-00006.bin", "model.layers.13.moe_block.experts.5.linear_v.weight": "pytorch_model-00006.bin", "model.layers.13.moe_block.experts.6.linear_v.weight": "pytorch_model-00006.bin", "model.layers.13.moe_block.experts.7.linear_v.weight": "pytorch_model-00006.bin", "model.layers.13.attn.k_proj.weight": "pytorch_model-00006.bin", "model.layers.13.attn.o_proj.weight": "pytorch_model-00006.bin", "model.layers.13.attn.q_proj.weight": "pytorch_model-00006.bin", "model.layers.13.attn.v_proj.weight": "pytorch_model-00006.bin", "model.layers.13.pre_attn_norm.scale": "pytorch_model-00006.bin", "model.layers.13.post_attn_norm.scale": "pytorch_model-00006.bin", "model.layers.13.pre_moe_norm.scale": "pytorch_model-00006.bin", "model.layers.13.post_moe_norm.scale": "pytorch_model-00006.bin", "model.layers.13.moe_block.gate.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.0.linear.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.1.linear.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.2.linear.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.3.linear.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.4.linear.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.5.linear.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.6.linear.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.7.linear.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.0.linear_1.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.1.linear_1.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.2.linear_1.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.3.linear_1.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.4.linear_1.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.5.linear_1.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.6.linear_1.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.7.linear_1.weight": "pytorch_model-00006.bin", "model.layers.14.moe_block.experts.0.linear_v.weight": "pytorch_model-00007.bin", "model.layers.14.moe_block.experts.1.linear_v.weight": "pytorch_model-00007.bin", "model.layers.14.moe_block.experts.2.linear_v.weight": "pytorch_model-00007.bin", "model.layers.14.moe_block.experts.3.linear_v.weight": "pytorch_model-00007.bin", "model.layers.14.moe_block.experts.4.linear_v.weight": "pytorch_model-00007.bin", "model.layers.14.moe_block.experts.5.linear_v.weight": "pytorch_model-00007.bin", "model.layers.14.moe_block.experts.6.linear_v.weight": "pytorch_model-00007.bin", "model.layers.14.moe_block.experts.7.linear_v.weight": "pytorch_model-00007.bin", "model.layers.14.attn.k_proj.weight": "pytorch_model-00007.bin", "model.layers.14.attn.o_proj.weight": "pytorch_model-00007.bin", "model.layers.14.attn.q_proj.weight": "pytorch_model-00007.bin", "model.layers.14.attn.v_proj.weight": "pytorch_model-00007.bin", "model.layers.14.pre_attn_norm.scale": "pytorch_model-00007.bin", "model.layers.14.post_attn_norm.scale": "pytorch_model-00007.bin", "model.layers.14.pre_moe_norm.scale": "pytorch_model-00007.bin", "model.layers.14.post_moe_norm.scale": "pytorch_model-00007.bin", "model.layers.14.moe_block.gate.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.0.linear.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.1.linear.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.2.linear.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.3.linear.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.4.linear.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.5.linear.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.6.linear.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.7.linear.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.0.linear_1.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.1.linear_1.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.2.linear_1.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.3.linear_1.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.4.linear_1.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.5.linear_1.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.6.linear_1.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.7.linear_1.weight": "pytorch_model-00007.bin", "model.layers.15.moe_block.experts.0.linear_v.weight": "pytorch_model-00008.bin", "model.layers.15.moe_block.experts.1.linear_v.weight": "pytorch_model-00008.bin", "model.layers.15.moe_block.experts.2.linear_v.weight": "pytorch_model-00008.bin", "model.layers.15.moe_block.experts.3.linear_v.weight": "pytorch_model-00008.bin", "model.layers.15.moe_block.experts.4.linear_v.weight": "pytorch_model-00008.bin", "model.layers.15.moe_block.experts.5.linear_v.weight": "pytorch_model-00008.bin", "model.layers.15.moe_block.experts.6.linear_v.weight": "pytorch_model-00008.bin", "model.layers.15.moe_block.experts.7.linear_v.weight": "pytorch_model-00008.bin", "model.layers.15.attn.k_proj.weight": "pytorch_model-00008.bin", "model.layers.15.attn.o_proj.weight": "pytorch_model-00008.bin", "model.layers.15.attn.q_proj.weight": "pytorch_model-00008.bin", "model.layers.15.attn.v_proj.weight": "pytorch_model-00008.bin", "model.layers.15.pre_attn_norm.scale": "pytorch_model-00008.bin", "model.layers.15.post_attn_norm.scale": "pytorch_model-00008.bin", "model.layers.15.pre_moe_norm.scale": "pytorch_model-00008.bin", "model.layers.15.post_moe_norm.scale": "pytorch_model-00008.bin", "model.layers.15.moe_block.gate.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.0.linear.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.1.linear.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.2.linear.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.3.linear.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.4.linear.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.5.linear.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.6.linear.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.7.linear.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.0.linear_1.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.1.linear_1.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.2.linear_1.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.3.linear_1.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.4.linear_1.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.5.linear_1.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.6.linear_1.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.7.linear_1.weight": "pytorch_model-00008.bin", "model.layers.16.moe_block.experts.0.linear_v.weight": "pytorch_model-00009.bin", "model.layers.16.moe_block.experts.1.linear_v.weight": "pytorch_model-00009.bin", "model.layers.16.moe_block.experts.2.linear_v.weight": "pytorch_model-00009.bin", "model.layers.16.moe_block.experts.3.linear_v.weight": "pytorch_model-00009.bin", "model.layers.16.moe_block.experts.4.linear_v.weight": "pytorch_model-00009.bin", "model.layers.16.moe_block.experts.5.linear_v.weight": "pytorch_model-00009.bin", "model.layers.16.moe_block.experts.6.linear_v.weight": "pytorch_model-00009.bin", "model.layers.16.moe_block.experts.7.linear_v.weight": "pytorch_model-00009.bin", "model.layers.16.attn.k_proj.weight": "pytorch_model-00009.bin", "model.layers.16.attn.o_proj.weight": "pytorch_model-00009.bin", "model.layers.16.attn.q_proj.weight": "pytorch_model-00009.bin", "model.layers.16.attn.v_proj.weight": "pytorch_model-00009.bin", "model.layers.16.pre_attn_norm.scale": "pytorch_model-00009.bin", "model.layers.16.post_attn_norm.scale": "pytorch_model-00009.bin", "model.layers.16.pre_moe_norm.scale": "pytorch_model-00009.bin", "model.layers.16.post_moe_norm.scale": "pytorch_model-00009.bin", "model.layers.16.moe_block.gate.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.0.linear.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.1.linear.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.2.linear.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.3.linear.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.4.linear.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.5.linear.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.6.linear.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.7.linear.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.0.linear_1.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.1.linear_1.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.2.linear_1.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.3.linear_1.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.4.linear_1.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.5.linear_1.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.6.linear_1.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.7.linear_1.weight": "pytorch_model-00009.bin", "model.layers.17.moe_block.experts.0.linear_v.weight": "pytorch_model-00010.bin", "model.layers.17.moe_block.experts.1.linear_v.weight": "pytorch_model-00010.bin", "model.layers.17.moe_block.experts.2.linear_v.weight": "pytorch_model-00010.bin", "model.layers.17.moe_block.experts.3.linear_v.weight": "pytorch_model-00010.bin", "model.layers.17.moe_block.experts.4.linear_v.weight": "pytorch_model-00010.bin", "model.layers.17.moe_block.experts.5.linear_v.weight": "pytorch_model-00010.bin", "model.layers.17.moe_block.experts.6.linear_v.weight": "pytorch_model-00010.bin", "model.layers.17.moe_block.experts.7.linear_v.weight": "pytorch_model-00010.bin", "model.layers.17.attn.k_proj.weight": "pytorch_model-00010.bin", "model.layers.17.attn.o_proj.weight": "pytorch_model-00010.bin", "model.layers.17.attn.q_proj.weight": "pytorch_model-00010.bin", "model.layers.17.attn.v_proj.weight": "pytorch_model-00010.bin", "model.layers.17.pre_attn_norm.scale": "pytorch_model-00010.bin", "model.layers.17.post_attn_norm.scale": "pytorch_model-00010.bin", "model.layers.17.pre_moe_norm.scale": "pytorch_model-00010.bin", "model.layers.17.post_moe_norm.scale": "pytorch_model-00010.bin", "model.layers.17.moe_block.gate.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.0.linear.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.1.linear.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.2.linear.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.3.linear.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.4.linear.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.5.linear.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.6.linear.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.7.linear.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.0.linear_1.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.1.linear_1.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.2.linear_1.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.3.linear_1.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.4.linear_1.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.5.linear_1.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.6.linear_1.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.7.linear_1.weight": "pytorch_model-00010.bin", "model.layers.18.moe_block.experts.0.linear_v.weight": "pytorch_model-00011.bin", "model.layers.18.moe_block.experts.1.linear_v.weight": "pytorch_model-00011.bin", "model.layers.18.moe_block.experts.2.linear_v.weight": "pytorch_model-00011.bin", "model.layers.18.moe_block.experts.3.linear_v.weight": "pytorch_model-00011.bin", "model.layers.18.moe_block.experts.4.linear_v.weight": "pytorch_model-00011.bin", "model.layers.18.moe_block.experts.5.linear_v.weight": "pytorch_model-00011.bin", "model.layers.18.moe_block.experts.6.linear_v.weight": "pytorch_model-00011.bin", "model.layers.18.moe_block.experts.7.linear_v.weight": "pytorch_model-00011.bin", "model.layers.18.attn.k_proj.weight": "pytorch_model-00011.bin", "model.layers.18.attn.o_proj.weight": "pytorch_model-00011.bin", "model.layers.18.attn.q_proj.weight": "pytorch_model-00011.bin", "model.layers.18.attn.v_proj.weight": "pytorch_model-00011.bin", "model.layers.18.pre_attn_norm.scale": "pytorch_model-00011.bin", "model.layers.18.post_attn_norm.scale": "pytorch_model-00011.bin", "model.layers.18.pre_moe_norm.scale": "pytorch_model-00011.bin", "model.layers.18.post_moe_norm.scale": "pytorch_model-00011.bin", "model.layers.18.moe_block.gate.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.0.linear.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.1.linear.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.2.linear.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.3.linear.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.4.linear.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.5.linear.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.6.linear.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.7.linear.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.0.linear_1.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.1.linear_1.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.2.linear_1.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.3.linear_1.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.4.linear_1.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.5.linear_1.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.6.linear_1.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.7.linear_1.weight": "pytorch_model-00011.bin", "model.layers.19.moe_block.experts.0.linear_v.weight": "pytorch_model-00012.bin", "model.layers.19.moe_block.experts.1.linear_v.weight": "pytorch_model-00012.bin", "model.layers.19.moe_block.experts.2.linear_v.weight": "pytorch_model-00012.bin", "model.layers.19.moe_block.experts.3.linear_v.weight": "pytorch_model-00012.bin", "model.layers.19.moe_block.experts.4.linear_v.weight": "pytorch_model-00012.bin", "model.layers.19.moe_block.experts.5.linear_v.weight": "pytorch_model-00012.bin", "model.layers.19.moe_block.experts.6.linear_v.weight": "pytorch_model-00012.bin", "model.layers.19.moe_block.experts.7.linear_v.weight": "pytorch_model-00012.bin", "model.layers.19.attn.k_proj.weight": "pytorch_model-00012.bin", "model.layers.19.attn.o_proj.weight": "pytorch_model-00012.bin", "model.layers.19.attn.q_proj.weight": "pytorch_model-00012.bin", "model.layers.19.attn.v_proj.weight": "pytorch_model-00012.bin", "model.layers.19.pre_attn_norm.scale": "pytorch_model-00012.bin", "model.layers.19.post_attn_norm.scale": "pytorch_model-00012.bin", "model.layers.19.pre_moe_norm.scale": "pytorch_model-00012.bin", "model.layers.19.post_moe_norm.scale": "pytorch_model-00012.bin", "model.layers.19.moe_block.gate.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.0.linear.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.1.linear.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.2.linear.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.3.linear.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.4.linear.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.5.linear.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.6.linear.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.7.linear.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.0.linear_1.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.1.linear_1.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.2.linear_1.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.3.linear_1.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.4.linear_1.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.5.linear_1.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.6.linear_1.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.7.linear_1.weight": "pytorch_model-00012.bin", "model.layers.2.moe_block.experts.0.linear_v.weight": "pytorch_model-00013.bin", "model.layers.2.moe_block.experts.1.linear_v.weight": "pytorch_model-00013.bin", "model.layers.2.moe_block.experts.2.linear_v.weight": "pytorch_model-00013.bin", "model.layers.2.moe_block.experts.3.linear_v.weight": "pytorch_model-00013.bin", "model.layers.2.moe_block.experts.4.linear_v.weight": "pytorch_model-00013.bin", "model.layers.2.moe_block.experts.5.linear_v.weight": "pytorch_model-00013.bin", "model.layers.2.moe_block.experts.6.linear_v.weight": "pytorch_model-00013.bin", "model.layers.2.moe_block.experts.7.linear_v.weight": "pytorch_model-00013.bin", "model.layers.2.attn.k_proj.weight": "pytorch_model-00013.bin", "model.layers.2.attn.o_proj.weight": "pytorch_model-00013.bin", "model.layers.2.attn.q_proj.weight": "pytorch_model-00013.bin", "model.layers.2.attn.v_proj.weight": "pytorch_model-00013.bin", "model.layers.2.pre_attn_norm.scale": "pytorch_model-00013.bin", "model.layers.2.post_attn_norm.scale": "pytorch_model-00013.bin", "model.layers.2.pre_moe_norm.scale": "pytorch_model-00013.bin", "model.layers.2.post_moe_norm.scale": "pytorch_model-00013.bin", "model.layers.2.moe_block.gate.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.0.linear.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.1.linear.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.2.linear.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.3.linear.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.4.linear.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.5.linear.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.6.linear.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.7.linear.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.0.linear_1.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.1.linear_1.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.2.linear_1.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.3.linear_1.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.4.linear_1.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.5.linear_1.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.6.linear_1.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.7.linear_1.weight": "pytorch_model-00013.bin", "model.layers.20.moe_block.experts.0.linear_v.weight": "pytorch_model-00014.bin", "model.layers.20.moe_block.experts.1.linear_v.weight": "pytorch_model-00014.bin", "model.layers.20.moe_block.experts.2.linear_v.weight": "pytorch_model-00014.bin", "model.layers.20.moe_block.experts.3.linear_v.weight": "pytorch_model-00014.bin", "model.layers.20.moe_block.experts.4.linear_v.weight": "pytorch_model-00014.bin", "model.layers.20.moe_block.experts.5.linear_v.weight": "pytorch_model-00014.bin", "model.layers.20.moe_block.experts.6.linear_v.weight": "pytorch_model-00014.bin", "model.layers.20.moe_block.experts.7.linear_v.weight": "pytorch_model-00014.bin", "model.layers.20.attn.k_proj.weight": "pytorch_model-00014.bin", "model.layers.20.attn.o_proj.weight": "pytorch_model-00014.bin", "model.layers.20.attn.q_proj.weight": "pytorch_model-00014.bin", "model.layers.20.attn.v_proj.weight": "pytorch_model-00014.bin", "model.layers.20.pre_attn_norm.scale": "pytorch_model-00014.bin", "model.layers.20.post_attn_norm.scale": "pytorch_model-00014.bin", "model.layers.20.pre_moe_norm.scale": "pytorch_model-00014.bin", "model.layers.20.post_moe_norm.scale": "pytorch_model-00014.bin", "model.layers.20.moe_block.gate.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.0.linear.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.1.linear.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.2.linear.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.3.linear.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.4.linear.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.5.linear.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.6.linear.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.7.linear.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.0.linear_1.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.1.linear_1.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.2.linear_1.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.3.linear_1.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.4.linear_1.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.5.linear_1.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.6.linear_1.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.7.linear_1.weight": "pytorch_model-00014.bin", "model.layers.21.moe_block.experts.0.linear_v.weight": "pytorch_model-00015.bin", "model.layers.21.moe_block.experts.1.linear_v.weight": "pytorch_model-00015.bin", "model.layers.21.moe_block.experts.2.linear_v.weight": "pytorch_model-00015.bin", "model.layers.21.moe_block.experts.3.linear_v.weight": "pytorch_model-00015.bin", "model.layers.21.moe_block.experts.4.linear_v.weight": "pytorch_model-00015.bin", "model.layers.21.moe_block.experts.5.linear_v.weight": "pytorch_model-00015.bin", "model.layers.21.moe_block.experts.6.linear_v.weight": "pytorch_model-00015.bin", "model.layers.21.moe_block.experts.7.linear_v.weight": "pytorch_model-00015.bin", "model.layers.21.attn.k_proj.weight": "pytorch_model-00015.bin", "model.layers.21.attn.o_proj.weight": "pytorch_model-00015.bin", "model.layers.21.attn.q_proj.weight": "pytorch_model-00015.bin", "model.layers.21.attn.v_proj.weight": "pytorch_model-00015.bin", "model.layers.21.pre_attn_norm.scale": "pytorch_model-00015.bin", "model.layers.21.post_attn_norm.scale": "pytorch_model-00015.bin", "model.layers.21.pre_moe_norm.scale": "pytorch_model-00015.bin", "model.layers.21.post_moe_norm.scale": "pytorch_model-00015.bin", "model.layers.21.moe_block.gate.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.0.linear.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.1.linear.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.2.linear.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.3.linear.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.4.linear.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.5.linear.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.6.linear.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.7.linear.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.0.linear_1.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.1.linear_1.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.2.linear_1.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.3.linear_1.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.4.linear_1.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.5.linear_1.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.6.linear_1.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.7.linear_1.weight": "pytorch_model-00015.bin", "model.layers.22.moe_block.experts.0.linear_v.weight": "pytorch_model-00016.bin", "model.layers.22.moe_block.experts.1.linear_v.weight": "pytorch_model-00016.bin", "model.layers.22.moe_block.experts.2.linear_v.weight": "pytorch_model-00016.bin", "model.layers.22.moe_block.experts.3.linear_v.weight": "pytorch_model-00016.bin", "model.layers.22.moe_block.experts.4.linear_v.weight": "pytorch_model-00016.bin", "model.layers.22.moe_block.experts.5.linear_v.weight": "pytorch_model-00016.bin", "model.layers.22.moe_block.experts.6.linear_v.weight": "pytorch_model-00016.bin", "model.layers.22.moe_block.experts.7.linear_v.weight": "pytorch_model-00016.bin", "model.layers.22.attn.k_proj.weight": "pytorch_model-00016.bin", "model.layers.22.attn.o_proj.weight": "pytorch_model-00016.bin", "model.layers.22.attn.q_proj.weight": "pytorch_model-00016.bin", "model.layers.22.attn.v_proj.weight": "pytorch_model-00016.bin", "model.layers.22.pre_attn_norm.scale": "pytorch_model-00016.bin", "model.layers.22.post_attn_norm.scale": "pytorch_model-00016.bin", "model.layers.22.pre_moe_norm.scale": "pytorch_model-00016.bin", "model.layers.22.post_moe_norm.scale": "pytorch_model-00016.bin", "model.layers.22.moe_block.gate.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.0.linear.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.1.linear.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.2.linear.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.3.linear.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.4.linear.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.5.linear.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.6.linear.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.7.linear.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.0.linear_1.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.1.linear_1.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.2.linear_1.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.3.linear_1.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.4.linear_1.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.5.linear_1.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.6.linear_1.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.7.linear_1.weight": "pytorch_model-00016.bin", "model.layers.23.moe_block.experts.0.linear_v.weight": "pytorch_model-00017.bin", "model.layers.23.moe_block.experts.1.linear_v.weight": "pytorch_model-00017.bin", "model.layers.23.moe_block.experts.2.linear_v.weight": "pytorch_model-00017.bin", "model.layers.23.moe_block.experts.3.linear_v.weight": "pytorch_model-00017.bin", "model.layers.23.moe_block.experts.4.linear_v.weight": "pytorch_model-00017.bin", "model.layers.23.moe_block.experts.5.linear_v.weight": "pytorch_model-00017.bin", "model.layers.23.moe_block.experts.6.linear_v.weight": "pytorch_model-00017.bin", "model.layers.23.moe_block.experts.7.linear_v.weight": "pytorch_model-00017.bin", "model.layers.23.attn.k_proj.weight": "pytorch_model-00017.bin", "model.layers.23.attn.o_proj.weight": "pytorch_model-00017.bin", "model.layers.23.attn.q_proj.weight": "pytorch_model-00017.bin", "model.layers.23.attn.v_proj.weight": "pytorch_model-00017.bin", "model.layers.23.pre_attn_norm.scale": "pytorch_model-00017.bin", "model.layers.23.post_attn_norm.scale": "pytorch_model-00017.bin", "model.layers.23.pre_moe_norm.scale": "pytorch_model-00017.bin", "model.layers.23.post_moe_norm.scale": "pytorch_model-00017.bin", "model.layers.23.moe_block.gate.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.0.linear.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.1.linear.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.2.linear.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.3.linear.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.4.linear.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.5.linear.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.6.linear.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.7.linear.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.0.linear_1.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.1.linear_1.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.2.linear_1.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.3.linear_1.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.4.linear_1.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.5.linear_1.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.6.linear_1.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.7.linear_1.weight": "pytorch_model-00017.bin", "model.layers.24.moe_block.experts.0.linear_v.weight": "pytorch_model-00018.bin", "model.layers.24.moe_block.experts.1.linear_v.weight": "pytorch_model-00018.bin", "model.layers.24.moe_block.experts.2.linear_v.weight": "pytorch_model-00018.bin", "model.layers.24.moe_block.experts.3.linear_v.weight": "pytorch_model-00018.bin", "model.layers.24.moe_block.experts.4.linear_v.weight": "pytorch_model-00018.bin", "model.layers.24.moe_block.experts.5.linear_v.weight": "pytorch_model-00018.bin", "model.layers.24.moe_block.experts.6.linear_v.weight": "pytorch_model-00018.bin", "model.layers.24.moe_block.experts.7.linear_v.weight": "pytorch_model-00018.bin", "model.layers.24.attn.k_proj.weight": "pytorch_model-00018.bin", "model.layers.24.attn.o_proj.weight": "pytorch_model-00018.bin", "model.layers.24.attn.q_proj.weight": "pytorch_model-00018.bin", "model.layers.24.attn.v_proj.weight": "pytorch_model-00018.bin", "model.layers.24.pre_attn_norm.scale": "pytorch_model-00018.bin", "model.layers.24.post_attn_norm.scale": "pytorch_model-00018.bin", "model.layers.24.pre_moe_norm.scale": "pytorch_model-00018.bin", "model.layers.24.post_moe_norm.scale": "pytorch_model-00018.bin", "model.layers.24.moe_block.gate.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.0.linear.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.1.linear.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.2.linear.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.3.linear.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.4.linear.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.5.linear.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.6.linear.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.7.linear.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.0.linear_1.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.1.linear_1.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.2.linear_1.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.3.linear_1.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.4.linear_1.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.5.linear_1.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.6.linear_1.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.7.linear_1.weight": "pytorch_model-00018.bin", "model.layers.25.moe_block.experts.0.linear_v.weight": "pytorch_model-00019.bin", "model.layers.25.moe_block.experts.1.linear_v.weight": "pytorch_model-00019.bin", "model.layers.25.moe_block.experts.2.linear_v.weight": "pytorch_model-00019.bin", "model.layers.25.moe_block.experts.3.linear_v.weight": "pytorch_model-00019.bin", "model.layers.25.moe_block.experts.4.linear_v.weight": "pytorch_model-00019.bin", "model.layers.25.moe_block.experts.5.linear_v.weight": "pytorch_model-00019.bin", "model.layers.25.moe_block.experts.6.linear_v.weight": "pytorch_model-00019.bin", "model.layers.25.moe_block.experts.7.linear_v.weight": "pytorch_model-00019.bin", "model.layers.25.attn.k_proj.weight": "pytorch_model-00019.bin", "model.layers.25.attn.o_proj.weight": "pytorch_model-00019.bin", "model.layers.25.attn.q_proj.weight": "pytorch_model-00019.bin", "model.layers.25.attn.v_proj.weight": "pytorch_model-00019.bin", "model.layers.25.pre_attn_norm.scale": "pytorch_model-00019.bin", "model.layers.25.post_attn_norm.scale": "pytorch_model-00019.bin", "model.layers.25.pre_moe_norm.scale": "pytorch_model-00019.bin", "model.layers.25.post_moe_norm.scale": "pytorch_model-00019.bin", "model.layers.25.moe_block.gate.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.0.linear.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.1.linear.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.2.linear.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.3.linear.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.4.linear.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.5.linear.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.6.linear.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.7.linear.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.0.linear_1.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.1.linear_1.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.2.linear_1.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.3.linear_1.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.4.linear_1.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.5.linear_1.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.6.linear_1.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.7.linear_1.weight": "pytorch_model-00019.bin", "model.layers.26.moe_block.experts.0.linear_v.weight": "pytorch_model-00020.bin", "model.layers.26.moe_block.experts.1.linear_v.weight": "pytorch_model-00020.bin", "model.layers.26.moe_block.experts.2.linear_v.weight": "pytorch_model-00020.bin", "model.layers.26.moe_block.experts.3.linear_v.weight": "pytorch_model-00020.bin", "model.layers.26.moe_block.experts.4.linear_v.weight": "pytorch_model-00020.bin", "model.layers.26.moe_block.experts.5.linear_v.weight": "pytorch_model-00020.bin", "model.layers.26.moe_block.experts.6.linear_v.weight": "pytorch_model-00020.bin", "model.layers.26.moe_block.experts.7.linear_v.weight": "pytorch_model-00020.bin", "model.layers.26.attn.k_proj.weight": "pytorch_model-00020.bin", "model.layers.26.attn.o_proj.weight": "pytorch_model-00020.bin", "model.layers.26.attn.q_proj.weight": "pytorch_model-00020.bin", "model.layers.26.attn.v_proj.weight": "pytorch_model-00020.bin", "model.layers.26.pre_attn_norm.scale": "pytorch_model-00020.bin", "model.layers.26.post_attn_norm.scale": "pytorch_model-00020.bin", "model.layers.26.pre_moe_norm.scale": "pytorch_model-00020.bin", "model.layers.26.post_moe_norm.scale": "pytorch_model-00020.bin", "model.layers.26.moe_block.gate.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.0.linear.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.1.linear.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.2.linear.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.3.linear.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.4.linear.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.5.linear.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.6.linear.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.7.linear.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.0.linear_1.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.1.linear_1.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.2.linear_1.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.3.linear_1.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.4.linear_1.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.5.linear_1.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.6.linear_1.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.7.linear_1.weight": "pytorch_model-00020.bin", "model.layers.27.moe_block.experts.0.linear_v.weight": "pytorch_model-00021.bin", "model.layers.27.moe_block.experts.1.linear_v.weight": "pytorch_model-00021.bin", "model.layers.27.moe_block.experts.2.linear_v.weight": "pytorch_model-00021.bin", "model.layers.27.moe_block.experts.3.linear_v.weight": "pytorch_model-00021.bin", "model.layers.27.moe_block.experts.4.linear_v.weight": "pytorch_model-00021.bin", "model.layers.27.moe_block.experts.5.linear_v.weight": "pytorch_model-00021.bin", "model.layers.27.moe_block.experts.6.linear_v.weight": "pytorch_model-00021.bin", "model.layers.27.moe_block.experts.7.linear_v.weight": "pytorch_model-00021.bin", "model.layers.27.attn.k_proj.weight": "pytorch_model-00021.bin", "model.layers.27.attn.o_proj.weight": "pytorch_model-00021.bin", "model.layers.27.attn.q_proj.weight": "pytorch_model-00021.bin", "model.layers.27.attn.v_proj.weight": "pytorch_model-00021.bin", "model.layers.27.pre_attn_norm.scale": "pytorch_model-00021.bin", "model.layers.27.post_attn_norm.scale": "pytorch_model-00021.bin", "model.layers.27.pre_moe_norm.scale": "pytorch_model-00021.bin", "model.layers.27.post_moe_norm.scale": "pytorch_model-00021.bin", "model.layers.27.moe_block.gate.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.0.linear.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.1.linear.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.2.linear.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.3.linear.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.4.linear.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.5.linear.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.6.linear.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.7.linear.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.0.linear_1.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.1.linear_1.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.2.linear_1.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.3.linear_1.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.4.linear_1.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.5.linear_1.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.6.linear_1.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.7.linear_1.weight": "pytorch_model-00021.bin", "model.layers.28.moe_block.experts.0.linear_v.weight": "pytorch_model-00022.bin", "model.layers.28.moe_block.experts.1.linear_v.weight": "pytorch_model-00022.bin", "model.layers.28.moe_block.experts.2.linear_v.weight": "pytorch_model-00022.bin", "model.layers.28.moe_block.experts.3.linear_v.weight": "pytorch_model-00022.bin", "model.layers.28.moe_block.experts.4.linear_v.weight": "pytorch_model-00022.bin", "model.layers.28.moe_block.experts.5.linear_v.weight": "pytorch_model-00022.bin", "model.layers.28.moe_block.experts.6.linear_v.weight": "pytorch_model-00022.bin", "model.layers.28.moe_block.experts.7.linear_v.weight": "pytorch_model-00022.bin", "model.layers.28.attn.k_proj.weight": "pytorch_model-00022.bin", "model.layers.28.attn.o_proj.weight": "pytorch_model-00022.bin", "model.layers.28.attn.q_proj.weight": "pytorch_model-00022.bin", "model.layers.28.attn.v_proj.weight": "pytorch_model-00022.bin", "model.layers.28.pre_attn_norm.scale": "pytorch_model-00022.bin", "model.layers.28.post_attn_norm.scale": "pytorch_model-00022.bin", "model.layers.28.pre_moe_norm.scale": "pytorch_model-00022.bin", "model.layers.28.post_moe_norm.scale": "pytorch_model-00022.bin", "model.layers.28.moe_block.gate.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.0.linear.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.1.linear.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.2.linear.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.3.linear.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.4.linear.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.5.linear.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.6.linear.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.7.linear.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.0.linear_1.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.1.linear_1.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.2.linear_1.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.3.linear_1.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.4.linear_1.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.5.linear_1.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.6.linear_1.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.7.linear_1.weight": "pytorch_model-00022.bin", "model.layers.29.moe_block.experts.0.linear_v.weight": "pytorch_model-00023.bin", "model.layers.29.moe_block.experts.1.linear_v.weight": "pytorch_model-00023.bin", "model.layers.29.moe_block.experts.2.linear_v.weight": "pytorch_model-00023.bin", "model.layers.29.moe_block.experts.3.linear_v.weight": "pytorch_model-00023.bin", "model.layers.29.moe_block.experts.4.linear_v.weight": "pytorch_model-00023.bin", "model.layers.29.moe_block.experts.5.linear_v.weight": "pytorch_model-00023.bin", "model.layers.29.moe_block.experts.6.linear_v.weight": "pytorch_model-00023.bin", "model.layers.29.moe_block.experts.7.linear_v.weight": "pytorch_model-00023.bin", "model.layers.29.attn.k_proj.weight": "pytorch_model-00023.bin", "model.layers.29.attn.o_proj.weight": "pytorch_model-00023.bin", "model.layers.29.attn.q_proj.weight": "pytorch_model-00023.bin", "model.layers.29.attn.v_proj.weight": "pytorch_model-00023.bin", "model.layers.29.pre_attn_norm.scale": "pytorch_model-00023.bin", "model.layers.29.post_attn_norm.scale": "pytorch_model-00023.bin", "model.layers.29.pre_moe_norm.scale": "pytorch_model-00023.bin", "model.layers.29.post_moe_norm.scale": "pytorch_model-00023.bin", "model.layers.29.moe_block.gate.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.0.linear.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.1.linear.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.2.linear.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.3.linear.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.4.linear.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.5.linear.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.6.linear.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.7.linear.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.0.linear_1.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.1.linear_1.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.2.linear_1.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.3.linear_1.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.4.linear_1.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.5.linear_1.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.6.linear_1.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.7.linear_1.weight": "pytorch_model-00023.bin", "model.layers.3.moe_block.experts.0.linear_v.weight": "pytorch_model-00024.bin", "model.layers.3.moe_block.experts.1.linear_v.weight": "pytorch_model-00024.bin", "model.layers.3.moe_block.experts.2.linear_v.weight": "pytorch_model-00024.bin", "model.layers.3.moe_block.experts.3.linear_v.weight": "pytorch_model-00024.bin", "model.layers.3.moe_block.experts.4.linear_v.weight": "pytorch_model-00024.bin", "model.layers.3.moe_block.experts.5.linear_v.weight": "pytorch_model-00024.bin", "model.layers.3.moe_block.experts.6.linear_v.weight": "pytorch_model-00024.bin", "model.layers.3.moe_block.experts.7.linear_v.weight": "pytorch_model-00024.bin", "model.layers.3.attn.k_proj.weight": "pytorch_model-00024.bin", "model.layers.3.attn.o_proj.weight": "pytorch_model-00024.bin", "model.layers.3.attn.q_proj.weight": "pytorch_model-00024.bin", "model.layers.3.attn.v_proj.weight": "pytorch_model-00024.bin", "model.layers.3.pre_attn_norm.scale": "pytorch_model-00024.bin", "model.layers.3.post_attn_norm.scale": "pytorch_model-00024.bin", "model.layers.3.pre_moe_norm.scale": "pytorch_model-00024.bin", "model.layers.3.post_moe_norm.scale": "pytorch_model-00024.bin", "model.layers.3.moe_block.gate.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.0.linear.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.1.linear.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.2.linear.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.3.linear.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.4.linear.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.5.linear.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.6.linear.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.7.linear.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.0.linear_1.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.1.linear_1.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.2.linear_1.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.3.linear_1.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.4.linear_1.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.5.linear_1.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.6.linear_1.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.7.linear_1.weight": "pytorch_model-00024.bin", "model.layers.30.moe_block.experts.0.linear_v.weight": "pytorch_model-00025.bin", "model.layers.30.moe_block.experts.1.linear_v.weight": "pytorch_model-00025.bin", "model.layers.30.moe_block.experts.2.linear_v.weight": "pytorch_model-00025.bin", "model.layers.30.moe_block.experts.3.linear_v.weight": "pytorch_model-00025.bin", "model.layers.30.moe_block.experts.4.linear_v.weight": "pytorch_model-00025.bin", "model.layers.30.moe_block.experts.5.linear_v.weight": "pytorch_model-00025.bin", "model.layers.30.moe_block.experts.6.linear_v.weight": "pytorch_model-00025.bin", "model.layers.30.moe_block.experts.7.linear_v.weight": "pytorch_model-00025.bin", "model.layers.30.attn.k_proj.weight": "pytorch_model-00025.bin", "model.layers.30.attn.o_proj.weight": "pytorch_model-00025.bin", "model.layers.30.attn.q_proj.weight": "pytorch_model-00025.bin", "model.layers.30.attn.v_proj.weight": "pytorch_model-00025.bin", "model.layers.30.pre_attn_norm.scale": "pytorch_model-00025.bin", "model.layers.30.post_attn_norm.scale": "pytorch_model-00025.bin", "model.layers.30.pre_moe_norm.scale": "pytorch_model-00025.bin", "model.layers.30.post_moe_norm.scale": "pytorch_model-00025.bin", "model.layers.30.moe_block.gate.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.0.linear.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.1.linear.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.2.linear.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.3.linear.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.4.linear.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.5.linear.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.6.linear.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.7.linear.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.0.linear_1.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.1.linear_1.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.2.linear_1.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.3.linear_1.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.4.linear_1.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.5.linear_1.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.6.linear_1.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.7.linear_1.weight": "pytorch_model-00025.bin", "model.layers.31.moe_block.experts.0.linear_v.weight": "pytorch_model-00026.bin", "model.layers.31.moe_block.experts.1.linear_v.weight": "pytorch_model-00026.bin", "model.layers.31.moe_block.experts.2.linear_v.weight": "pytorch_model-00026.bin", "model.layers.31.moe_block.experts.3.linear_v.weight": "pytorch_model-00026.bin", "model.layers.31.moe_block.experts.4.linear_v.weight": "pytorch_model-00026.bin", "model.layers.31.moe_block.experts.5.linear_v.weight": "pytorch_model-00026.bin", "model.layers.31.moe_block.experts.6.linear_v.weight": "pytorch_model-00026.bin", "model.layers.31.moe_block.experts.7.linear_v.weight": "pytorch_model-00026.bin", "model.layers.31.attn.k_proj.weight": "pytorch_model-00026.bin", "model.layers.31.attn.o_proj.weight": "pytorch_model-00026.bin", "model.layers.31.attn.q_proj.weight": "pytorch_model-00026.bin", "model.layers.31.attn.v_proj.weight": "pytorch_model-00026.bin", "model.layers.31.pre_attn_norm.scale": "pytorch_model-00026.bin", "model.layers.31.post_attn_norm.scale": "pytorch_model-00026.bin", "model.layers.31.pre_moe_norm.scale": "pytorch_model-00026.bin", "model.layers.31.post_moe_norm.scale": "pytorch_model-00026.bin", "model.layers.31.moe_block.gate.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.0.linear.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.1.linear.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.2.linear.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.3.linear.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.4.linear.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.5.linear.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.6.linear.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.7.linear.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.0.linear_1.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.1.linear_1.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.2.linear_1.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.3.linear_1.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.4.linear_1.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.5.linear_1.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.6.linear_1.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.7.linear_1.weight": "pytorch_model-00026.bin", "model.layers.32.moe_block.experts.0.linear_v.weight": "pytorch_model-00027.bin", "model.layers.32.moe_block.experts.1.linear_v.weight": "pytorch_model-00027.bin", "model.layers.32.moe_block.experts.2.linear_v.weight": "pytorch_model-00027.bin", "model.layers.32.moe_block.experts.3.linear_v.weight": "pytorch_model-00027.bin", "model.layers.32.moe_block.experts.4.linear_v.weight": "pytorch_model-00027.bin", "model.layers.32.moe_block.experts.5.linear_v.weight": "pytorch_model-00027.bin", "model.layers.32.moe_block.experts.6.linear_v.weight": "pytorch_model-00027.bin", "model.layers.32.moe_block.experts.7.linear_v.weight": "pytorch_model-00027.bin", "model.layers.32.attn.k_proj.weight": "pytorch_model-00027.bin", "model.layers.32.attn.o_proj.weight": "pytorch_model-00027.bin", "model.layers.32.attn.q_proj.weight": "pytorch_model-00027.bin", "model.layers.32.attn.v_proj.weight": "pytorch_model-00027.bin", "model.layers.32.pre_attn_norm.scale": "pytorch_model-00027.bin", "model.layers.32.post_attn_norm.scale": "pytorch_model-00027.bin", "model.layers.32.pre_moe_norm.scale": "pytorch_model-00027.bin", "model.layers.32.post_moe_norm.scale": "pytorch_model-00027.bin", "model.layers.32.moe_block.gate.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.0.linear.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.1.linear.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.2.linear.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.3.linear.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.4.linear.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.5.linear.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.6.linear.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.7.linear.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.0.linear_1.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.1.linear_1.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.2.linear_1.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.3.linear_1.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.4.linear_1.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.5.linear_1.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.6.linear_1.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.7.linear_1.weight": "pytorch_model-00027.bin", "model.layers.33.moe_block.experts.0.linear_v.weight": "pytorch_model-00028.bin", "model.layers.33.moe_block.experts.1.linear_v.weight": "pytorch_model-00028.bin", "model.layers.33.moe_block.experts.2.linear_v.weight": "pytorch_model-00028.bin", "model.layers.33.moe_block.experts.3.linear_v.weight": "pytorch_model-00028.bin", "model.layers.33.moe_block.experts.4.linear_v.weight": "pytorch_model-00028.bin", "model.layers.33.moe_block.experts.5.linear_v.weight": "pytorch_model-00028.bin", "model.layers.33.moe_block.experts.6.linear_v.weight": "pytorch_model-00028.bin", "model.layers.33.moe_block.experts.7.linear_v.weight": "pytorch_model-00028.bin", "model.layers.33.attn.k_proj.weight": "pytorch_model-00028.bin", "model.layers.33.attn.o_proj.weight": "pytorch_model-00028.bin", "model.layers.33.attn.q_proj.weight": "pytorch_model-00028.bin", "model.layers.33.attn.v_proj.weight": "pytorch_model-00028.bin", "model.layers.33.pre_attn_norm.scale": "pytorch_model-00028.bin", "model.layers.33.post_attn_norm.scale": "pytorch_model-00028.bin", "model.layers.33.pre_moe_norm.scale": "pytorch_model-00028.bin", "model.layers.33.post_moe_norm.scale": "pytorch_model-00028.bin", "model.layers.33.moe_block.gate.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.0.linear.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.1.linear.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.2.linear.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.3.linear.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.4.linear.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.5.linear.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.6.linear.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.7.linear.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.0.linear_1.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.1.linear_1.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.2.linear_1.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.3.linear_1.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.4.linear_1.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.5.linear_1.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.6.linear_1.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.7.linear_1.weight": "pytorch_model-00028.bin", "model.layers.34.moe_block.experts.0.linear_v.weight": "pytorch_model-00029.bin", "model.layers.34.moe_block.experts.1.linear_v.weight": "pytorch_model-00029.bin", "model.layers.34.moe_block.experts.2.linear_v.weight": "pytorch_model-00029.bin", "model.layers.34.moe_block.experts.3.linear_v.weight": "pytorch_model-00029.bin", "model.layers.34.moe_block.experts.4.linear_v.weight": "pytorch_model-00029.bin", "model.layers.34.moe_block.experts.5.linear_v.weight": "pytorch_model-00029.bin", "model.layers.34.moe_block.experts.6.linear_v.weight": "pytorch_model-00029.bin", "model.layers.34.moe_block.experts.7.linear_v.weight": "pytorch_model-00029.bin", "model.layers.34.attn.k_proj.weight": "pytorch_model-00029.bin", "model.layers.34.attn.o_proj.weight": "pytorch_model-00029.bin", "model.layers.34.attn.q_proj.weight": "pytorch_model-00029.bin", "model.layers.34.attn.v_proj.weight": "pytorch_model-00029.bin", "model.layers.34.pre_attn_norm.scale": "pytorch_model-00029.bin", "model.layers.34.post_attn_norm.scale": "pytorch_model-00029.bin", "model.layers.34.pre_moe_norm.scale": "pytorch_model-00029.bin", "model.layers.34.post_moe_norm.scale": "pytorch_model-00029.bin", "model.layers.34.moe_block.gate.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.0.linear.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.1.linear.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.2.linear.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.3.linear.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.4.linear.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.5.linear.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.6.linear.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.7.linear.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.0.linear_1.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.1.linear_1.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.2.linear_1.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.3.linear_1.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.4.linear_1.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.5.linear_1.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.6.linear_1.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.7.linear_1.weight": "pytorch_model-00029.bin", "model.layers.35.moe_block.experts.0.linear_v.weight": "pytorch_model-00030.bin", "model.layers.35.moe_block.experts.1.linear_v.weight": "pytorch_model-00030.bin", "model.layers.35.moe_block.experts.2.linear_v.weight": "pytorch_model-00030.bin", "model.layers.35.moe_block.experts.3.linear_v.weight": "pytorch_model-00030.bin", "model.layers.35.moe_block.experts.4.linear_v.weight": "pytorch_model-00030.bin", "model.layers.35.moe_block.experts.5.linear_v.weight": "pytorch_model-00030.bin", "model.layers.35.moe_block.experts.6.linear_v.weight": "pytorch_model-00030.bin", "model.layers.35.moe_block.experts.7.linear_v.weight": "pytorch_model-00030.bin", "model.layers.35.attn.k_proj.weight": "pytorch_model-00030.bin", "model.layers.35.attn.o_proj.weight": "pytorch_model-00030.bin", "model.layers.35.attn.q_proj.weight": "pytorch_model-00030.bin", "model.layers.35.attn.v_proj.weight": "pytorch_model-00030.bin", "model.layers.35.pre_attn_norm.scale": "pytorch_model-00030.bin", "model.layers.35.post_attn_norm.scale": "pytorch_model-00030.bin", "model.layers.35.pre_moe_norm.scale": "pytorch_model-00030.bin", "model.layers.35.post_moe_norm.scale": "pytorch_model-00030.bin", "model.layers.35.moe_block.gate.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.0.linear.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.1.linear.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.2.linear.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.3.linear.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.4.linear.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.5.linear.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.6.linear.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.7.linear.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.0.linear_1.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.1.linear_1.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.2.linear_1.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.3.linear_1.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.4.linear_1.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.5.linear_1.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.6.linear_1.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.7.linear_1.weight": "pytorch_model-00030.bin", "model.layers.36.moe_block.experts.0.linear_v.weight": "pytorch_model-00031.bin", "model.layers.36.moe_block.experts.1.linear_v.weight": "pytorch_model-00031.bin", "model.layers.36.moe_block.experts.2.linear_v.weight": "pytorch_model-00031.bin", "model.layers.36.moe_block.experts.3.linear_v.weight": "pytorch_model-00031.bin", "model.layers.36.moe_block.experts.4.linear_v.weight": "pytorch_model-00031.bin", "model.layers.36.moe_block.experts.5.linear_v.weight": "pytorch_model-00031.bin", "model.layers.36.moe_block.experts.6.linear_v.weight": "pytorch_model-00031.bin", "model.layers.36.moe_block.experts.7.linear_v.weight": "pytorch_model-00031.bin", "model.layers.36.attn.k_proj.weight": "pytorch_model-00031.bin", "model.layers.36.attn.o_proj.weight": "pytorch_model-00031.bin", "model.layers.36.attn.q_proj.weight": "pytorch_model-00031.bin", "model.layers.36.attn.v_proj.weight": "pytorch_model-00031.bin", "model.layers.36.pre_attn_norm.scale": "pytorch_model-00031.bin", "model.layers.36.post_attn_norm.scale": "pytorch_model-00031.bin", "model.layers.36.pre_moe_norm.scale": "pytorch_model-00031.bin", "model.layers.36.post_moe_norm.scale": "pytorch_model-00031.bin", "model.layers.36.moe_block.gate.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.0.linear.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.1.linear.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.2.linear.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.3.linear.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.4.linear.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.5.linear.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.6.linear.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.7.linear.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.0.linear_1.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.1.linear_1.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.2.linear_1.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.3.linear_1.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.4.linear_1.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.5.linear_1.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.6.linear_1.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.7.linear_1.weight": "pytorch_model-00031.bin", "model.layers.37.moe_block.experts.0.linear_v.weight": "pytorch_model-00032.bin", "model.layers.37.moe_block.experts.1.linear_v.weight": "pytorch_model-00032.bin", "model.layers.37.moe_block.experts.2.linear_v.weight": "pytorch_model-00032.bin", "model.layers.37.moe_block.experts.3.linear_v.weight": "pytorch_model-00032.bin", "model.layers.37.moe_block.experts.4.linear_v.weight": "pytorch_model-00032.bin", "model.layers.37.moe_block.experts.5.linear_v.weight": "pytorch_model-00032.bin", "model.layers.37.moe_block.experts.6.linear_v.weight": "pytorch_model-00032.bin", "model.layers.37.moe_block.experts.7.linear_v.weight": "pytorch_model-00032.bin", "model.layers.37.attn.k_proj.weight": "pytorch_model-00032.bin", "model.layers.37.attn.o_proj.weight": "pytorch_model-00032.bin", "model.layers.37.attn.q_proj.weight": "pytorch_model-00032.bin", "model.layers.37.attn.v_proj.weight": "pytorch_model-00032.bin", "model.layers.37.pre_attn_norm.scale": "pytorch_model-00032.bin", "model.layers.37.post_attn_norm.scale": "pytorch_model-00032.bin", "model.layers.37.pre_moe_norm.scale": "pytorch_model-00032.bin", "model.layers.37.post_moe_norm.scale": "pytorch_model-00032.bin", "model.layers.37.moe_block.gate.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.0.linear.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.1.linear.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.2.linear.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.3.linear.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.4.linear.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.5.linear.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.6.linear.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.7.linear.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.0.linear_1.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.1.linear_1.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.2.linear_1.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.3.linear_1.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.4.linear_1.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.5.linear_1.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.6.linear_1.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.7.linear_1.weight": "pytorch_model-00032.bin", "model.layers.38.moe_block.experts.0.linear_v.weight": "pytorch_model-00033.bin", "model.layers.38.moe_block.experts.1.linear_v.weight": "pytorch_model-00033.bin", "model.layers.38.moe_block.experts.2.linear_v.weight": "pytorch_model-00033.bin", "model.layers.38.moe_block.experts.3.linear_v.weight": "pytorch_model-00033.bin", "model.layers.38.moe_block.experts.4.linear_v.weight": "pytorch_model-00033.bin", "model.layers.38.moe_block.experts.5.linear_v.weight": "pytorch_model-00033.bin", "model.layers.38.moe_block.experts.6.linear_v.weight": "pytorch_model-00033.bin", "model.layers.38.moe_block.experts.7.linear_v.weight": "pytorch_model-00033.bin", "model.layers.38.attn.k_proj.weight": "pytorch_model-00033.bin", "model.layers.38.attn.o_proj.weight": "pytorch_model-00033.bin", "model.layers.38.attn.q_proj.weight": "pytorch_model-00033.bin", "model.layers.38.attn.v_proj.weight": "pytorch_model-00033.bin", "model.layers.38.pre_attn_norm.scale": "pytorch_model-00033.bin", "model.layers.38.post_attn_norm.scale": "pytorch_model-00033.bin", "model.layers.38.pre_moe_norm.scale": "pytorch_model-00033.bin", "model.layers.38.post_moe_norm.scale": "pytorch_model-00033.bin", "model.layers.38.moe_block.gate.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.0.linear.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.1.linear.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.2.linear.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.3.linear.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.4.linear.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.5.linear.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.6.linear.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.7.linear.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.0.linear_1.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.1.linear_1.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.2.linear_1.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.3.linear_1.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.4.linear_1.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.5.linear_1.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.6.linear_1.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.7.linear_1.weight": "pytorch_model-00033.bin", "model.layers.39.moe_block.experts.0.linear_v.weight": "pytorch_model-00034.bin", "model.layers.39.moe_block.experts.1.linear_v.weight": "pytorch_model-00034.bin", "model.layers.39.moe_block.experts.2.linear_v.weight": "pytorch_model-00034.bin", "model.layers.39.moe_block.experts.3.linear_v.weight": "pytorch_model-00034.bin", "model.layers.39.moe_block.experts.4.linear_v.weight": "pytorch_model-00034.bin", "model.layers.39.moe_block.experts.5.linear_v.weight": "pytorch_model-00034.bin", "model.layers.39.moe_block.experts.6.linear_v.weight": "pytorch_model-00034.bin", "model.layers.39.moe_block.experts.7.linear_v.weight": "pytorch_model-00034.bin", "model.layers.39.attn.k_proj.weight": "pytorch_model-00034.bin", "model.layers.39.attn.o_proj.weight": "pytorch_model-00034.bin", "model.layers.39.attn.q_proj.weight": "pytorch_model-00034.bin", "model.layers.39.attn.v_proj.weight": "pytorch_model-00034.bin", "model.layers.39.pre_attn_norm.scale": "pytorch_model-00034.bin", "model.layers.39.post_attn_norm.scale": "pytorch_model-00034.bin", "model.layers.39.pre_moe_norm.scale": "pytorch_model-00034.bin", "model.layers.39.post_moe_norm.scale": "pytorch_model-00034.bin", "model.layers.39.moe_block.gate.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.0.linear.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.1.linear.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.2.linear.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.3.linear.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.4.linear.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.5.linear.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.6.linear.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.7.linear.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.0.linear_1.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.1.linear_1.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.2.linear_1.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.3.linear_1.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.4.linear_1.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.5.linear_1.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.6.linear_1.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.7.linear_1.weight": "pytorch_model-00034.bin", "model.layers.4.moe_block.experts.0.linear_v.weight": "pytorch_model-00035.bin", "model.layers.4.moe_block.experts.1.linear_v.weight": "pytorch_model-00035.bin", "model.layers.4.moe_block.experts.2.linear_v.weight": "pytorch_model-00035.bin", "model.layers.4.moe_block.experts.3.linear_v.weight": "pytorch_model-00035.bin", "model.layers.4.moe_block.experts.4.linear_v.weight": "pytorch_model-00035.bin", "model.layers.4.moe_block.experts.5.linear_v.weight": "pytorch_model-00035.bin", "model.layers.4.moe_block.experts.6.linear_v.weight": "pytorch_model-00035.bin", "model.layers.4.moe_block.experts.7.linear_v.weight": "pytorch_model-00035.bin", "model.layers.4.attn.k_proj.weight": "pytorch_model-00035.bin", "model.layers.4.attn.o_proj.weight": "pytorch_model-00035.bin", "model.layers.4.attn.q_proj.weight": "pytorch_model-00035.bin", "model.layers.4.attn.v_proj.weight": "pytorch_model-00035.bin", "model.layers.4.pre_attn_norm.scale": "pytorch_model-00035.bin", "model.layers.4.post_attn_norm.scale": "pytorch_model-00035.bin", "model.layers.4.pre_moe_norm.scale": "pytorch_model-00035.bin", "model.layers.4.post_moe_norm.scale": "pytorch_model-00035.bin", "model.layers.4.moe_block.gate.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.0.linear.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.1.linear.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.2.linear.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.3.linear.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.4.linear.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.5.linear.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.6.linear.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.7.linear.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.0.linear_1.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.1.linear_1.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.2.linear_1.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.3.linear_1.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.4.linear_1.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.5.linear_1.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.6.linear_1.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.7.linear_1.weight": "pytorch_model-00035.bin", "model.layers.40.moe_block.experts.0.linear_v.weight": "pytorch_model-00036.bin", "model.layers.40.moe_block.experts.1.linear_v.weight": "pytorch_model-00036.bin", "model.layers.40.moe_block.experts.2.linear_v.weight": "pytorch_model-00036.bin", "model.layers.40.moe_block.experts.3.linear_v.weight": "pytorch_model-00036.bin", "model.layers.40.moe_block.experts.4.linear_v.weight": "pytorch_model-00036.bin", "model.layers.40.moe_block.experts.5.linear_v.weight": "pytorch_model-00036.bin", "model.layers.40.moe_block.experts.6.linear_v.weight": "pytorch_model-00036.bin", "model.layers.40.moe_block.experts.7.linear_v.weight": "pytorch_model-00036.bin", "model.layers.40.attn.k_proj.weight": "pytorch_model-00036.bin", "model.layers.40.attn.o_proj.weight": "pytorch_model-00036.bin", "model.layers.40.attn.q_proj.weight": "pytorch_model-00036.bin", "model.layers.40.attn.v_proj.weight": "pytorch_model-00036.bin", "model.layers.40.pre_attn_norm.scale": "pytorch_model-00036.bin", "model.layers.40.post_attn_norm.scale": "pytorch_model-00036.bin", "model.layers.40.pre_moe_norm.scale": "pytorch_model-00036.bin", "model.layers.40.post_moe_norm.scale": "pytorch_model-00036.bin", "model.layers.40.moe_block.gate.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.0.linear.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.1.linear.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.2.linear.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.3.linear.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.4.linear.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.5.linear.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.6.linear.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.7.linear.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.0.linear_1.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.1.linear_1.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.2.linear_1.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.3.linear_1.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.4.linear_1.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.5.linear_1.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.6.linear_1.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.7.linear_1.weight": "pytorch_model-00036.bin", "model.layers.41.moe_block.experts.0.linear_v.weight": "pytorch_model-00037.bin", "model.layers.41.moe_block.experts.1.linear_v.weight": "pytorch_model-00037.bin", "model.layers.41.moe_block.experts.2.linear_v.weight": "pytorch_model-00037.bin", "model.layers.41.moe_block.experts.3.linear_v.weight": "pytorch_model-00037.bin", "model.layers.41.moe_block.experts.4.linear_v.weight": "pytorch_model-00037.bin", "model.layers.41.moe_block.experts.5.linear_v.weight": "pytorch_model-00037.bin", "model.layers.41.moe_block.experts.6.linear_v.weight": "pytorch_model-00037.bin", "model.layers.41.moe_block.experts.7.linear_v.weight": "pytorch_model-00037.bin", "model.layers.41.attn.k_proj.weight": "pytorch_model-00037.bin", "model.layers.41.attn.o_proj.weight": "pytorch_model-00037.bin", "model.layers.41.attn.q_proj.weight": "pytorch_model-00037.bin", "model.layers.41.attn.v_proj.weight": "pytorch_model-00037.bin", "model.layers.41.pre_attn_norm.scale": "pytorch_model-00037.bin", "model.layers.41.post_attn_norm.scale": "pytorch_model-00037.bin", "model.layers.41.pre_moe_norm.scale": "pytorch_model-00037.bin", "model.layers.41.post_moe_norm.scale": "pytorch_model-00037.bin", "model.layers.41.moe_block.gate.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.0.linear.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.1.linear.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.2.linear.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.3.linear.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.4.linear.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.5.linear.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.6.linear.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.7.linear.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.0.linear_1.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.1.linear_1.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.2.linear_1.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.3.linear_1.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.4.linear_1.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.5.linear_1.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.6.linear_1.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.7.linear_1.weight": "pytorch_model-00037.bin", "model.layers.42.moe_block.experts.0.linear_v.weight": "pytorch_model-00038.bin", "model.layers.42.moe_block.experts.1.linear_v.weight": "pytorch_model-00038.bin", "model.layers.42.moe_block.experts.2.linear_v.weight": "pytorch_model-00038.bin", "model.layers.42.moe_block.experts.3.linear_v.weight": "pytorch_model-00038.bin", "model.layers.42.moe_block.experts.4.linear_v.weight": "pytorch_model-00038.bin", "model.layers.42.moe_block.experts.5.linear_v.weight": "pytorch_model-00038.bin", "model.layers.42.moe_block.experts.6.linear_v.weight": "pytorch_model-00038.bin", "model.layers.42.moe_block.experts.7.linear_v.weight": "pytorch_model-00038.bin", "model.layers.42.attn.k_proj.weight": "pytorch_model-00038.bin", "model.layers.42.attn.o_proj.weight": "pytorch_model-00038.bin", "model.layers.42.attn.q_proj.weight": "pytorch_model-00038.bin", "model.layers.42.attn.v_proj.weight": "pytorch_model-00038.bin", "model.layers.42.pre_attn_norm.scale": "pytorch_model-00038.bin", "model.layers.42.post_attn_norm.scale": "pytorch_model-00038.bin", "model.layers.42.pre_moe_norm.scale": "pytorch_model-00038.bin", "model.layers.42.post_moe_norm.scale": "pytorch_model-00038.bin", "model.layers.42.moe_block.gate.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.0.linear.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.1.linear.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.2.linear.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.3.linear.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.4.linear.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.5.linear.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.6.linear.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.7.linear.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.0.linear_1.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.1.linear_1.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.2.linear_1.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.3.linear_1.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.4.linear_1.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.5.linear_1.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.6.linear_1.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.7.linear_1.weight": "pytorch_model-00038.bin", "model.layers.43.moe_block.experts.0.linear_v.weight": "pytorch_model-00039.bin", "model.layers.43.moe_block.experts.1.linear_v.weight": "pytorch_model-00039.bin", "model.layers.43.moe_block.experts.2.linear_v.weight": "pytorch_model-00039.bin", "model.layers.43.moe_block.experts.3.linear_v.weight": "pytorch_model-00039.bin", "model.layers.43.moe_block.experts.4.linear_v.weight": "pytorch_model-00039.bin", "model.layers.43.moe_block.experts.5.linear_v.weight": "pytorch_model-00039.bin", "model.layers.43.moe_block.experts.6.linear_v.weight": "pytorch_model-00039.bin", "model.layers.43.moe_block.experts.7.linear_v.weight": "pytorch_model-00039.bin", "model.layers.43.attn.k_proj.weight": "pytorch_model-00039.bin", "model.layers.43.attn.o_proj.weight": "pytorch_model-00039.bin", "model.layers.43.attn.q_proj.weight": "pytorch_model-00039.bin", "model.layers.43.attn.v_proj.weight": "pytorch_model-00039.bin", "model.layers.43.pre_attn_norm.scale": "pytorch_model-00039.bin", "model.layers.43.post_attn_norm.scale": "pytorch_model-00039.bin", "model.layers.43.pre_moe_norm.scale": "pytorch_model-00039.bin", "model.layers.43.post_moe_norm.scale": "pytorch_model-00039.bin", "model.layers.43.moe_block.gate.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.0.linear.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.1.linear.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.2.linear.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.3.linear.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.4.linear.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.5.linear.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.6.linear.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.7.linear.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.0.linear_1.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.1.linear_1.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.2.linear_1.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.3.linear_1.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.4.linear_1.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.5.linear_1.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.6.linear_1.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.7.linear_1.weight": "pytorch_model-00039.bin", "model.layers.44.moe_block.experts.0.linear_v.weight": "pytorch_model-00040.bin", "model.layers.44.moe_block.experts.1.linear_v.weight": "pytorch_model-00040.bin", "model.layers.44.moe_block.experts.2.linear_v.weight": "pytorch_model-00040.bin", "model.layers.44.moe_block.experts.3.linear_v.weight": "pytorch_model-00040.bin", "model.layers.44.moe_block.experts.4.linear_v.weight": "pytorch_model-00040.bin", "model.layers.44.moe_block.experts.5.linear_v.weight": "pytorch_model-00040.bin", "model.layers.44.moe_block.experts.6.linear_v.weight": "pytorch_model-00040.bin", "model.layers.44.moe_block.experts.7.linear_v.weight": "pytorch_model-00040.bin", "model.layers.44.attn.k_proj.weight": "pytorch_model-00040.bin", "model.layers.44.attn.o_proj.weight": "pytorch_model-00040.bin", "model.layers.44.attn.q_proj.weight": "pytorch_model-00040.bin", "model.layers.44.attn.v_proj.weight": "pytorch_model-00040.bin", "model.layers.44.pre_attn_norm.scale": "pytorch_model-00040.bin", "model.layers.44.post_attn_norm.scale": "pytorch_model-00040.bin", "model.layers.44.pre_moe_norm.scale": "pytorch_model-00040.bin", "model.layers.44.post_moe_norm.scale": "pytorch_model-00040.bin", "model.layers.44.moe_block.gate.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.0.linear.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.1.linear.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.2.linear.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.3.linear.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.4.linear.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.5.linear.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.6.linear.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.7.linear.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.0.linear_1.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.1.linear_1.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.2.linear_1.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.3.linear_1.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.4.linear_1.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.5.linear_1.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.6.linear_1.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.7.linear_1.weight": "pytorch_model-00040.bin", "model.layers.45.moe_block.experts.0.linear_v.weight": "pytorch_model-00041.bin", "model.layers.45.moe_block.experts.1.linear_v.weight": "pytorch_model-00041.bin", "model.layers.45.moe_block.experts.2.linear_v.weight": "pytorch_model-00041.bin", "model.layers.45.moe_block.experts.3.linear_v.weight": "pytorch_model-00041.bin", "model.layers.45.moe_block.experts.4.linear_v.weight": "pytorch_model-00041.bin", "model.layers.45.moe_block.experts.5.linear_v.weight": "pytorch_model-00041.bin", "model.layers.45.moe_block.experts.6.linear_v.weight": "pytorch_model-00041.bin", "model.layers.45.moe_block.experts.7.linear_v.weight": "pytorch_model-00041.bin", "model.layers.45.attn.k_proj.weight": "pytorch_model-00041.bin", "model.layers.45.attn.o_proj.weight": "pytorch_model-00041.bin", "model.layers.45.attn.q_proj.weight": "pytorch_model-00041.bin", "model.layers.45.attn.v_proj.weight": "pytorch_model-00041.bin", "model.layers.45.pre_attn_norm.scale": "pytorch_model-00041.bin", "model.layers.45.post_attn_norm.scale": "pytorch_model-00041.bin", "model.layers.45.pre_moe_norm.scale": "pytorch_model-00041.bin", "model.layers.45.post_moe_norm.scale": "pytorch_model-00041.bin", "model.layers.45.moe_block.gate.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.0.linear.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.1.linear.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.2.linear.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.3.linear.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.4.linear.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.5.linear.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.6.linear.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.7.linear.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.0.linear_1.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.1.linear_1.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.2.linear_1.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.3.linear_1.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.4.linear_1.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.5.linear_1.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.6.linear_1.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.7.linear_1.weight": "pytorch_model-00041.bin", "model.layers.46.moe_block.experts.0.linear_v.weight": "pytorch_model-00042.bin", "model.layers.46.moe_block.experts.1.linear_v.weight": "pytorch_model-00042.bin", "model.layers.46.moe_block.experts.2.linear_v.weight": "pytorch_model-00042.bin", "model.layers.46.moe_block.experts.3.linear_v.weight": "pytorch_model-00042.bin", "model.layers.46.moe_block.experts.4.linear_v.weight": "pytorch_model-00042.bin", "model.layers.46.moe_block.experts.5.linear_v.weight": "pytorch_model-00042.bin", "model.layers.46.moe_block.experts.6.linear_v.weight": "pytorch_model-00042.bin", "model.layers.46.moe_block.experts.7.linear_v.weight": "pytorch_model-00042.bin", "model.layers.46.attn.k_proj.weight": "pytorch_model-00042.bin", "model.layers.46.attn.o_proj.weight": "pytorch_model-00042.bin", "model.layers.46.attn.q_proj.weight": "pytorch_model-00042.bin", "model.layers.46.attn.v_proj.weight": "pytorch_model-00042.bin", "model.layers.46.pre_attn_norm.scale": "pytorch_model-00042.bin", "model.layers.46.post_attn_norm.scale": "pytorch_model-00042.bin", "model.layers.46.pre_moe_norm.scale": "pytorch_model-00042.bin", "model.layers.46.post_moe_norm.scale": "pytorch_model-00042.bin", "model.layers.46.moe_block.gate.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.0.linear.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.1.linear.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.2.linear.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.3.linear.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.4.linear.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.5.linear.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.6.linear.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.7.linear.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.0.linear_1.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.1.linear_1.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.2.linear_1.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.3.linear_1.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.4.linear_1.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.5.linear_1.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.6.linear_1.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.7.linear_1.weight": "pytorch_model-00042.bin", "model.layers.47.moe_block.experts.0.linear_v.weight": "pytorch_model-00043.bin", "model.layers.47.moe_block.experts.1.linear_v.weight": "pytorch_model-00043.bin", "model.layers.47.moe_block.experts.2.linear_v.weight": "pytorch_model-00043.bin", "model.layers.47.moe_block.experts.3.linear_v.weight": "pytorch_model-00043.bin", "model.layers.47.moe_block.experts.4.linear_v.weight": "pytorch_model-00043.bin", "model.layers.47.moe_block.experts.5.linear_v.weight": "pytorch_model-00043.bin", "model.layers.47.moe_block.experts.6.linear_v.weight": "pytorch_model-00043.bin", "model.layers.47.moe_block.experts.7.linear_v.weight": "pytorch_model-00043.bin", "model.layers.47.attn.k_proj.weight": "pytorch_model-00043.bin", "model.layers.47.attn.o_proj.weight": "pytorch_model-00043.bin", "model.layers.47.attn.q_proj.weight": "pytorch_model-00043.bin", "model.layers.47.attn.v_proj.weight": "pytorch_model-00043.bin", "model.layers.47.pre_attn_norm.scale": "pytorch_model-00043.bin", "model.layers.47.post_attn_norm.scale": "pytorch_model-00043.bin", "model.layers.47.pre_moe_norm.scale": "pytorch_model-00043.bin", "model.layers.47.post_moe_norm.scale": "pytorch_model-00043.bin", "model.layers.47.moe_block.gate.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.0.linear.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.1.linear.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.2.linear.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.3.linear.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.4.linear.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.5.linear.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.6.linear.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.7.linear.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.0.linear_1.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.1.linear_1.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.2.linear_1.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.3.linear_1.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.4.linear_1.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.5.linear_1.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.6.linear_1.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.7.linear_1.weight": "pytorch_model-00043.bin", "model.layers.48.moe_block.experts.0.linear_v.weight": "pytorch_model-00044.bin", "model.layers.48.moe_block.experts.1.linear_v.weight": "pytorch_model-00044.bin", "model.layers.48.moe_block.experts.2.linear_v.weight": "pytorch_model-00044.bin", "model.layers.48.moe_block.experts.3.linear_v.weight": "pytorch_model-00044.bin", "model.layers.48.moe_block.experts.4.linear_v.weight": "pytorch_model-00044.bin", "model.layers.48.moe_block.experts.5.linear_v.weight": "pytorch_model-00044.bin", "model.layers.48.moe_block.experts.6.linear_v.weight": "pytorch_model-00044.bin", "model.layers.48.moe_block.experts.7.linear_v.weight": "pytorch_model-00044.bin", "model.layers.48.attn.k_proj.weight": "pytorch_model-00044.bin", "model.layers.48.attn.o_proj.weight": "pytorch_model-00044.bin", "model.layers.48.attn.q_proj.weight": "pytorch_model-00044.bin", "model.layers.48.attn.v_proj.weight": "pytorch_model-00044.bin", "model.layers.48.pre_attn_norm.scale": "pytorch_model-00044.bin", "model.layers.48.post_attn_norm.scale": "pytorch_model-00044.bin", "model.layers.48.pre_moe_norm.scale": "pytorch_model-00044.bin", "model.layers.48.post_moe_norm.scale": "pytorch_model-00044.bin", "model.layers.48.moe_block.gate.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.0.linear.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.1.linear.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.2.linear.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.3.linear.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.4.linear.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.5.linear.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.6.linear.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.7.linear.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.0.linear_1.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.1.linear_1.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.2.linear_1.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.3.linear_1.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.4.linear_1.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.5.linear_1.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.6.linear_1.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.7.linear_1.weight": "pytorch_model-00044.bin", "model.layers.49.moe_block.experts.0.linear_v.weight": "pytorch_model-00045.bin", "model.layers.49.moe_block.experts.1.linear_v.weight": "pytorch_model-00045.bin", "model.layers.49.moe_block.experts.2.linear_v.weight": "pytorch_model-00045.bin", "model.layers.49.moe_block.experts.3.linear_v.weight": "pytorch_model-00045.bin", "model.layers.49.moe_block.experts.4.linear_v.weight": "pytorch_model-00045.bin", "model.layers.49.moe_block.experts.5.linear_v.weight": "pytorch_model-00045.bin", "model.layers.49.moe_block.experts.6.linear_v.weight": "pytorch_model-00045.bin", "model.layers.49.moe_block.experts.7.linear_v.weight": "pytorch_model-00045.bin", "model.layers.49.attn.k_proj.weight": "pytorch_model-00045.bin", "model.layers.49.attn.o_proj.weight": "pytorch_model-00045.bin", "model.layers.49.attn.q_proj.weight": "pytorch_model-00045.bin", "model.layers.49.attn.v_proj.weight": "pytorch_model-00045.bin", "model.layers.49.pre_attn_norm.scale": "pytorch_model-00045.bin", "model.layers.49.post_attn_norm.scale": "pytorch_model-00045.bin", "model.layers.49.pre_moe_norm.scale": "pytorch_model-00045.bin", "model.layers.49.post_moe_norm.scale": "pytorch_model-00045.bin", "model.layers.49.moe_block.gate.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.0.linear.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.1.linear.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.2.linear.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.3.linear.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.4.linear.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.5.linear.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.6.linear.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.7.linear.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.0.linear_1.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.1.linear_1.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.2.linear_1.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.3.linear_1.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.4.linear_1.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.5.linear_1.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.6.linear_1.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.7.linear_1.weight": "pytorch_model-00045.bin", "model.layers.5.moe_block.experts.0.linear_v.weight": "pytorch_model-00046.bin", "model.layers.5.moe_block.experts.1.linear_v.weight": "pytorch_model-00046.bin", "model.layers.5.moe_block.experts.2.linear_v.weight": "pytorch_model-00046.bin", "model.layers.5.moe_block.experts.3.linear_v.weight": "pytorch_model-00046.bin", "model.layers.5.moe_block.experts.4.linear_v.weight": "pytorch_model-00046.bin", "model.layers.5.moe_block.experts.5.linear_v.weight": "pytorch_model-00046.bin", "model.layers.5.moe_block.experts.6.linear_v.weight": "pytorch_model-00046.bin", "model.layers.5.moe_block.experts.7.linear_v.weight": "pytorch_model-00046.bin", "model.layers.5.attn.k_proj.weight": "pytorch_model-00046.bin", "model.layers.5.attn.o_proj.weight": "pytorch_model-00046.bin", "model.layers.5.attn.q_proj.weight": "pytorch_model-00046.bin", "model.layers.5.attn.v_proj.weight": "pytorch_model-00046.bin", "model.layers.5.pre_attn_norm.scale": "pytorch_model-00046.bin", "model.layers.5.post_attn_norm.scale": "pytorch_model-00046.bin", "model.layers.5.pre_moe_norm.scale": "pytorch_model-00046.bin", "model.layers.5.post_moe_norm.scale": "pytorch_model-00046.bin", "model.layers.5.moe_block.gate.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.0.linear.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.1.linear.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.2.linear.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.3.linear.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.4.linear.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.5.linear.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.6.linear.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.7.linear.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.0.linear_1.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.1.linear_1.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.2.linear_1.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.3.linear_1.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.4.linear_1.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.5.linear_1.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.6.linear_1.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.7.linear_1.weight": "pytorch_model-00046.bin", "model.layers.50.moe_block.experts.0.linear_v.weight": "pytorch_model-00047.bin", "model.layers.50.moe_block.experts.1.linear_v.weight": "pytorch_model-00047.bin", "model.layers.50.moe_block.experts.2.linear_v.weight": "pytorch_model-00047.bin", "model.layers.50.moe_block.experts.3.linear_v.weight": "pytorch_model-00047.bin", "model.layers.50.moe_block.experts.4.linear_v.weight": "pytorch_model-00047.bin", "model.layers.50.moe_block.experts.5.linear_v.weight": "pytorch_model-00047.bin", "model.layers.50.moe_block.experts.6.linear_v.weight": "pytorch_model-00047.bin", "model.layers.50.moe_block.experts.7.linear_v.weight": "pytorch_model-00047.bin", "model.layers.50.attn.k_proj.weight": "pytorch_model-00047.bin", "model.layers.50.attn.o_proj.weight": "pytorch_model-00047.bin", "model.layers.50.attn.q_proj.weight": "pytorch_model-00047.bin", "model.layers.50.attn.v_proj.weight": "pytorch_model-00047.bin", "model.layers.50.pre_attn_norm.scale": "pytorch_model-00047.bin", "model.layers.50.post_attn_norm.scale": "pytorch_model-00047.bin", "model.layers.50.pre_moe_norm.scale": "pytorch_model-00047.bin", "model.layers.50.post_moe_norm.scale": "pytorch_model-00047.bin", "model.layers.50.moe_block.gate.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.0.linear.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.1.linear.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.2.linear.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.3.linear.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.4.linear.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.5.linear.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.6.linear.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.7.linear.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.0.linear_1.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.1.linear_1.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.2.linear_1.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.3.linear_1.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.4.linear_1.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.5.linear_1.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.6.linear_1.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.7.linear_1.weight": "pytorch_model-00047.bin", "model.layers.51.moe_block.experts.0.linear_v.weight": "pytorch_model-00048.bin", "model.layers.51.moe_block.experts.1.linear_v.weight": "pytorch_model-00048.bin", "model.layers.51.moe_block.experts.2.linear_v.weight": "pytorch_model-00048.bin", "model.layers.51.moe_block.experts.3.linear_v.weight": "pytorch_model-00048.bin", "model.layers.51.moe_block.experts.4.linear_v.weight": "pytorch_model-00048.bin", "model.layers.51.moe_block.experts.5.linear_v.weight": "pytorch_model-00048.bin", "model.layers.51.moe_block.experts.6.linear_v.weight": "pytorch_model-00048.bin", "model.layers.51.moe_block.experts.7.linear_v.weight": "pytorch_model-00048.bin", "model.layers.51.attn.k_proj.weight": "pytorch_model-00048.bin", "model.layers.51.attn.o_proj.weight": "pytorch_model-00048.bin", "model.layers.51.attn.q_proj.weight": "pytorch_model-00048.bin", "model.layers.51.attn.v_proj.weight": "pytorch_model-00048.bin", "model.layers.51.pre_attn_norm.scale": "pytorch_model-00048.bin", "model.layers.51.post_attn_norm.scale": "pytorch_model-00048.bin", "model.layers.51.pre_moe_norm.scale": "pytorch_model-00048.bin", "model.layers.51.post_moe_norm.scale": "pytorch_model-00048.bin", "model.layers.51.moe_block.gate.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.0.linear.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.1.linear.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.2.linear.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.3.linear.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.4.linear.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.5.linear.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.6.linear.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.7.linear.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.0.linear_1.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.1.linear_1.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.2.linear_1.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.3.linear_1.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.4.linear_1.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.5.linear_1.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.6.linear_1.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.7.linear_1.weight": "pytorch_model-00048.bin", "model.layers.52.moe_block.experts.0.linear_v.weight": "pytorch_model-00049.bin", "model.layers.52.moe_block.experts.1.linear_v.weight": "pytorch_model-00049.bin", "model.layers.52.moe_block.experts.2.linear_v.weight": "pytorch_model-00049.bin", "model.layers.52.moe_block.experts.3.linear_v.weight": "pytorch_model-00049.bin", "model.layers.52.moe_block.experts.4.linear_v.weight": "pytorch_model-00049.bin", "model.layers.52.moe_block.experts.5.linear_v.weight": "pytorch_model-00049.bin", "model.layers.52.moe_block.experts.6.linear_v.weight": "pytorch_model-00049.bin", "model.layers.52.moe_block.experts.7.linear_v.weight": "pytorch_model-00049.bin", "model.layers.52.attn.k_proj.weight": "pytorch_model-00049.bin", "model.layers.52.attn.o_proj.weight": "pytorch_model-00049.bin", "model.layers.52.attn.q_proj.weight": "pytorch_model-00049.bin", "model.layers.52.attn.v_proj.weight": "pytorch_model-00049.bin", "model.layers.52.pre_attn_norm.scale": "pytorch_model-00049.bin", "model.layers.52.post_attn_norm.scale": "pytorch_model-00049.bin", "model.layers.52.pre_moe_norm.scale": "pytorch_model-00049.bin", "model.layers.52.post_moe_norm.scale": "pytorch_model-00049.bin", "model.layers.52.moe_block.gate.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.0.linear.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.1.linear.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.2.linear.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.3.linear.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.4.linear.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.5.linear.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.6.linear.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.7.linear.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.0.linear_1.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.1.linear_1.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.2.linear_1.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.3.linear_1.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.4.linear_1.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.5.linear_1.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.6.linear_1.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.7.linear_1.weight": "pytorch_model-00049.bin", "model.layers.53.moe_block.experts.0.linear_v.weight": "pytorch_model-00050.bin", "model.layers.53.moe_block.experts.1.linear_v.weight": "pytorch_model-00050.bin", "model.layers.53.moe_block.experts.2.linear_v.weight": "pytorch_model-00050.bin", "model.layers.53.moe_block.experts.3.linear_v.weight": "pytorch_model-00050.bin", "model.layers.53.moe_block.experts.4.linear_v.weight": "pytorch_model-00050.bin", "model.layers.53.moe_block.experts.5.linear_v.weight": "pytorch_model-00050.bin", "model.layers.53.moe_block.experts.6.linear_v.weight": "pytorch_model-00050.bin", "model.layers.53.moe_block.experts.7.linear_v.weight": "pytorch_model-00050.bin", "model.layers.53.attn.k_proj.weight": "pytorch_model-00050.bin", "model.layers.53.attn.o_proj.weight": "pytorch_model-00050.bin", "model.layers.53.attn.q_proj.weight": "pytorch_model-00050.bin", "model.layers.53.attn.v_proj.weight": "pytorch_model-00050.bin", "model.layers.53.pre_attn_norm.scale": "pytorch_model-00050.bin", "model.layers.53.post_attn_norm.scale": "pytorch_model-00050.bin", "model.layers.53.pre_moe_norm.scale": "pytorch_model-00050.bin", "model.layers.53.post_moe_norm.scale": "pytorch_model-00050.bin", "model.layers.53.moe_block.gate.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.0.linear.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.1.linear.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.2.linear.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.3.linear.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.4.linear.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.5.linear.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.6.linear.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.7.linear.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.0.linear_1.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.1.linear_1.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.2.linear_1.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.3.linear_1.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.4.linear_1.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.5.linear_1.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.6.linear_1.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.7.linear_1.weight": "pytorch_model-00050.bin", "model.layers.54.moe_block.experts.0.linear_v.weight": "pytorch_model-00051.bin", "model.layers.54.moe_block.experts.1.linear_v.weight": "pytorch_model-00051.bin", "model.layers.54.moe_block.experts.2.linear_v.weight": "pytorch_model-00051.bin", "model.layers.54.moe_block.experts.3.linear_v.weight": "pytorch_model-00051.bin", "model.layers.54.moe_block.experts.4.linear_v.weight": "pytorch_model-00051.bin", "model.layers.54.moe_block.experts.5.linear_v.weight": "pytorch_model-00051.bin", "model.layers.54.moe_block.experts.6.linear_v.weight": "pytorch_model-00051.bin", "model.layers.54.moe_block.experts.7.linear_v.weight": "pytorch_model-00051.bin", "model.layers.54.attn.k_proj.weight": "pytorch_model-00051.bin", "model.layers.54.attn.o_proj.weight": "pytorch_model-00051.bin", "model.layers.54.attn.q_proj.weight": "pytorch_model-00051.bin", "model.layers.54.attn.v_proj.weight": "pytorch_model-00051.bin", "model.layers.54.pre_attn_norm.scale": "pytorch_model-00051.bin", "model.layers.54.post_attn_norm.scale": "pytorch_model-00051.bin", "model.layers.54.pre_moe_norm.scale": "pytorch_model-00051.bin", "model.layers.54.post_moe_norm.scale": "pytorch_model-00051.bin", "model.layers.54.moe_block.gate.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.0.linear.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.1.linear.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.2.linear.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.3.linear.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.4.linear.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.5.linear.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.6.linear.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.7.linear.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.0.linear_1.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.1.linear_1.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.2.linear_1.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.3.linear_1.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.4.linear_1.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.5.linear_1.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.6.linear_1.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.7.linear_1.weight": "pytorch_model-00051.bin", "model.layers.55.moe_block.experts.0.linear_v.weight": "pytorch_model-00052.bin", "model.layers.55.moe_block.experts.1.linear_v.weight": "pytorch_model-00052.bin", "model.layers.55.moe_block.experts.2.linear_v.weight": "pytorch_model-00052.bin", "model.layers.55.moe_block.experts.3.linear_v.weight": "pytorch_model-00052.bin", "model.layers.55.moe_block.experts.4.linear_v.weight": "pytorch_model-00052.bin", "model.layers.55.moe_block.experts.5.linear_v.weight": "pytorch_model-00052.bin", "model.layers.55.moe_block.experts.6.linear_v.weight": "pytorch_model-00052.bin", "model.layers.55.moe_block.experts.7.linear_v.weight": "pytorch_model-00052.bin", "model.layers.55.attn.k_proj.weight": "pytorch_model-00052.bin", "model.layers.55.attn.o_proj.weight": "pytorch_model-00052.bin", "model.layers.55.attn.q_proj.weight": "pytorch_model-00052.bin", "model.layers.55.attn.v_proj.weight": "pytorch_model-00052.bin", "model.layers.55.pre_attn_norm.scale": "pytorch_model-00052.bin", "model.layers.55.post_attn_norm.scale": "pytorch_model-00052.bin", "model.layers.55.pre_moe_norm.scale": "pytorch_model-00052.bin", "model.layers.55.post_moe_norm.scale": "pytorch_model-00052.bin", "model.layers.55.moe_block.gate.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.0.linear.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.1.linear.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.2.linear.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.3.linear.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.4.linear.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.5.linear.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.6.linear.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.7.linear.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.0.linear_1.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.1.linear_1.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.2.linear_1.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.3.linear_1.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.4.linear_1.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.5.linear_1.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.6.linear_1.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.7.linear_1.weight": "pytorch_model-00052.bin", "model.layers.56.moe_block.experts.0.linear_v.weight": "pytorch_model-00053.bin", "model.layers.56.moe_block.experts.1.linear_v.weight": "pytorch_model-00053.bin", "model.layers.56.moe_block.experts.2.linear_v.weight": "pytorch_model-00053.bin", "model.layers.56.moe_block.experts.3.linear_v.weight": "pytorch_model-00053.bin", "model.layers.56.moe_block.experts.4.linear_v.weight": "pytorch_model-00053.bin", "model.layers.56.moe_block.experts.5.linear_v.weight": "pytorch_model-00053.bin", "model.layers.56.moe_block.experts.6.linear_v.weight": "pytorch_model-00053.bin", "model.layers.56.moe_block.experts.7.linear_v.weight": "pytorch_model-00053.bin", "model.layers.56.attn.k_proj.weight": "pytorch_model-00053.bin", "model.layers.56.attn.o_proj.weight": "pytorch_model-00053.bin", "model.layers.56.attn.q_proj.weight": "pytorch_model-00053.bin", "model.layers.56.attn.v_proj.weight": "pytorch_model-00053.bin", "model.layers.56.pre_attn_norm.scale": "pytorch_model-00053.bin", "model.layers.56.post_attn_norm.scale": "pytorch_model-00053.bin", "model.layers.56.pre_moe_norm.scale": "pytorch_model-00053.bin", "model.layers.56.post_moe_norm.scale": "pytorch_model-00053.bin", "model.layers.56.moe_block.gate.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.0.linear.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.1.linear.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.2.linear.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.3.linear.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.4.linear.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.5.linear.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.6.linear.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.7.linear.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.0.linear_1.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.1.linear_1.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.2.linear_1.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.3.linear_1.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.4.linear_1.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.5.linear_1.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.6.linear_1.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.7.linear_1.weight": "pytorch_model-00053.bin", "model.layers.57.moe_block.experts.0.linear_v.weight": "pytorch_model-00054.bin", "model.layers.57.moe_block.experts.1.linear_v.weight": "pytorch_model-00054.bin", "model.layers.57.moe_block.experts.2.linear_v.weight": "pytorch_model-00054.bin", "model.layers.57.moe_block.experts.3.linear_v.weight": "pytorch_model-00054.bin", "model.layers.57.moe_block.experts.4.linear_v.weight": "pytorch_model-00054.bin", "model.layers.57.moe_block.experts.5.linear_v.weight": "pytorch_model-00054.bin", "model.layers.57.moe_block.experts.6.linear_v.weight": "pytorch_model-00054.bin", "model.layers.57.moe_block.experts.7.linear_v.weight": "pytorch_model-00054.bin", "model.layers.57.attn.k_proj.weight": "pytorch_model-00054.bin", "model.layers.57.attn.o_proj.weight": "pytorch_model-00054.bin", "model.layers.57.attn.q_proj.weight": "pytorch_model-00054.bin", "model.layers.57.attn.v_proj.weight": "pytorch_model-00054.bin", "model.layers.57.pre_attn_norm.scale": "pytorch_model-00054.bin", "model.layers.57.post_attn_norm.scale": "pytorch_model-00054.bin", "model.layers.57.pre_moe_norm.scale": "pytorch_model-00054.bin", "model.layers.57.post_moe_norm.scale": "pytorch_model-00054.bin", "model.layers.57.moe_block.gate.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.0.linear.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.1.linear.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.2.linear.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.3.linear.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.4.linear.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.5.linear.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.6.linear.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.7.linear.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.0.linear_1.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.1.linear_1.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.2.linear_1.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.3.linear_1.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.4.linear_1.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.5.linear_1.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.6.linear_1.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.7.linear_1.weight": "pytorch_model-00054.bin", "model.layers.58.moe_block.experts.0.linear_v.weight": "pytorch_model-00055.bin", "model.layers.58.moe_block.experts.1.linear_v.weight": "pytorch_model-00055.bin", "model.layers.58.moe_block.experts.2.linear_v.weight": "pytorch_model-00055.bin", "model.layers.58.moe_block.experts.3.linear_v.weight": "pytorch_model-00055.bin", "model.layers.58.moe_block.experts.4.linear_v.weight": "pytorch_model-00055.bin", "model.layers.58.moe_block.experts.5.linear_v.weight": "pytorch_model-00055.bin", "model.layers.58.moe_block.experts.6.linear_v.weight": "pytorch_model-00055.bin", "model.layers.58.moe_block.experts.7.linear_v.weight": "pytorch_model-00055.bin", "model.layers.58.attn.k_proj.weight": "pytorch_model-00055.bin", "model.layers.58.attn.o_proj.weight": "pytorch_model-00055.bin", "model.layers.58.attn.q_proj.weight": "pytorch_model-00055.bin", "model.layers.58.attn.v_proj.weight": "pytorch_model-00055.bin", "model.layers.58.pre_attn_norm.scale": "pytorch_model-00055.bin", "model.layers.58.post_attn_norm.scale": "pytorch_model-00055.bin", "model.layers.58.pre_moe_norm.scale": "pytorch_model-00055.bin", "model.layers.58.post_moe_norm.scale": "pytorch_model-00055.bin", "model.layers.58.moe_block.gate.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.0.linear.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.1.linear.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.2.linear.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.3.linear.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.4.linear.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.5.linear.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.6.linear.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.7.linear.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.0.linear_1.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.1.linear_1.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.2.linear_1.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.3.linear_1.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.4.linear_1.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.5.linear_1.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.6.linear_1.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.7.linear_1.weight": "pytorch_model-00055.bin", "model.layers.59.moe_block.experts.0.linear_v.weight": "pytorch_model-00056.bin", "model.layers.59.moe_block.experts.1.linear_v.weight": "pytorch_model-00056.bin", "model.layers.59.moe_block.experts.2.linear_v.weight": "pytorch_model-00056.bin", "model.layers.59.moe_block.experts.3.linear_v.weight": "pytorch_model-00056.bin", "model.layers.59.moe_block.experts.4.linear_v.weight": "pytorch_model-00056.bin", "model.layers.59.moe_block.experts.5.linear_v.weight": "pytorch_model-00056.bin", "model.layers.59.moe_block.experts.6.linear_v.weight": "pytorch_model-00056.bin", "model.layers.59.moe_block.experts.7.linear_v.weight": "pytorch_model-00056.bin", "model.layers.59.attn.k_proj.weight": "pytorch_model-00056.bin", "model.layers.59.attn.o_proj.weight": "pytorch_model-00056.bin", "model.layers.59.attn.q_proj.weight": "pytorch_model-00056.bin", "model.layers.59.attn.v_proj.weight": "pytorch_model-00056.bin", "model.layers.59.pre_attn_norm.scale": "pytorch_model-00056.bin", "model.layers.59.post_attn_norm.scale": "pytorch_model-00056.bin", "model.layers.59.pre_moe_norm.scale": "pytorch_model-00056.bin", "model.layers.59.post_moe_norm.scale": "pytorch_model-00056.bin", "model.layers.59.moe_block.gate.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.0.linear.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.1.linear.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.2.linear.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.3.linear.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.4.linear.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.5.linear.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.6.linear.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.7.linear.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.0.linear_1.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.1.linear_1.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.2.linear_1.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.3.linear_1.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.4.linear_1.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.5.linear_1.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.6.linear_1.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.7.linear_1.weight": "pytorch_model-00056.bin", "model.layers.6.moe_block.experts.0.linear_v.weight": "pytorch_model-00057.bin", "model.layers.6.moe_block.experts.1.linear_v.weight": "pytorch_model-00057.bin", "model.layers.6.moe_block.experts.2.linear_v.weight": "pytorch_model-00057.bin", "model.layers.6.moe_block.experts.3.linear_v.weight": "pytorch_model-00057.bin", "model.layers.6.moe_block.experts.4.linear_v.weight": "pytorch_model-00057.bin", "model.layers.6.moe_block.experts.5.linear_v.weight": "pytorch_model-00057.bin", "model.layers.6.moe_block.experts.6.linear_v.weight": "pytorch_model-00057.bin", "model.layers.6.moe_block.experts.7.linear_v.weight": "pytorch_model-00057.bin", "model.layers.6.attn.k_proj.weight": "pytorch_model-00057.bin", "model.layers.6.attn.o_proj.weight": "pytorch_model-00057.bin", "model.layers.6.attn.q_proj.weight": "pytorch_model-00057.bin", "model.layers.6.attn.v_proj.weight": "pytorch_model-00057.bin", "model.layers.6.pre_attn_norm.scale": "pytorch_model-00057.bin", "model.layers.6.post_attn_norm.scale": "pytorch_model-00057.bin", "model.layers.6.pre_moe_norm.scale": "pytorch_model-00057.bin", "model.layers.6.post_moe_norm.scale": "pytorch_model-00057.bin", "model.layers.6.moe_block.gate.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.0.linear.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.1.linear.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.2.linear.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.3.linear.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.4.linear.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.5.linear.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.6.linear.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.7.linear.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.0.linear_1.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.1.linear_1.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.2.linear_1.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.3.linear_1.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.4.linear_1.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.5.linear_1.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.6.linear_1.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.7.linear_1.weight": "pytorch_model-00057.bin", "model.layers.60.moe_block.experts.0.linear_v.weight": "pytorch_model-00058.bin", "model.layers.60.moe_block.experts.1.linear_v.weight": "pytorch_model-00058.bin", "model.layers.60.moe_block.experts.2.linear_v.weight": "pytorch_model-00058.bin", "model.layers.60.moe_block.experts.3.linear_v.weight": "pytorch_model-00058.bin", "model.layers.60.moe_block.experts.4.linear_v.weight": "pytorch_model-00058.bin", "model.layers.60.moe_block.experts.5.linear_v.weight": "pytorch_model-00058.bin", "model.layers.60.moe_block.experts.6.linear_v.weight": "pytorch_model-00058.bin", "model.layers.60.moe_block.experts.7.linear_v.weight": "pytorch_model-00058.bin", "model.layers.60.attn.k_proj.weight": "pytorch_model-00058.bin", "model.layers.60.attn.o_proj.weight": "pytorch_model-00058.bin", "model.layers.60.attn.q_proj.weight": "pytorch_model-00058.bin", "model.layers.60.attn.v_proj.weight": "pytorch_model-00058.bin", "model.layers.60.pre_attn_norm.scale": "pytorch_model-00058.bin", "model.layers.60.post_attn_norm.scale": "pytorch_model-00058.bin", "model.layers.60.pre_moe_norm.scale": "pytorch_model-00058.bin", "model.layers.60.post_moe_norm.scale": "pytorch_model-00058.bin", "model.layers.60.moe_block.gate.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.0.linear.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.1.linear.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.2.linear.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.3.linear.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.4.linear.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.5.linear.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.6.linear.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.7.linear.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.0.linear_1.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.1.linear_1.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.2.linear_1.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.3.linear_1.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.4.linear_1.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.5.linear_1.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.6.linear_1.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.7.linear_1.weight": "pytorch_model-00058.bin", "model.layers.61.moe_block.experts.0.linear_v.weight": "pytorch_model-00059.bin", "model.layers.61.moe_block.experts.1.linear_v.weight": "pytorch_model-00059.bin", "model.layers.61.moe_block.experts.2.linear_v.weight": "pytorch_model-00059.bin", "model.layers.61.moe_block.experts.3.linear_v.weight": "pytorch_model-00059.bin", "model.layers.61.moe_block.experts.4.linear_v.weight": "pytorch_model-00059.bin", "model.layers.61.moe_block.experts.5.linear_v.weight": "pytorch_model-00059.bin", "model.layers.61.moe_block.experts.6.linear_v.weight": "pytorch_model-00059.bin", "model.layers.61.moe_block.experts.7.linear_v.weight": "pytorch_model-00059.bin", "model.layers.61.attn.k_proj.weight": "pytorch_model-00059.bin", "model.layers.61.attn.o_proj.weight": "pytorch_model-00059.bin", "model.layers.61.attn.q_proj.weight": "pytorch_model-00059.bin", "model.layers.61.attn.v_proj.weight": "pytorch_model-00059.bin", "model.layers.61.pre_attn_norm.scale": "pytorch_model-00059.bin", "model.layers.61.post_attn_norm.scale": "pytorch_model-00059.bin", "model.layers.61.pre_moe_norm.scale": "pytorch_model-00059.bin", "model.layers.61.post_moe_norm.scale": "pytorch_model-00059.bin", "model.layers.61.moe_block.gate.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.0.linear.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.1.linear.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.2.linear.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.3.linear.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.4.linear.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.5.linear.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.6.linear.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.7.linear.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.0.linear_1.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.1.linear_1.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.2.linear_1.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.3.linear_1.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.4.linear_1.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.5.linear_1.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.6.linear_1.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.7.linear_1.weight": "pytorch_model-00059.bin", "model.layers.62.moe_block.experts.0.linear_v.weight": "pytorch_model-00060.bin", "model.layers.62.moe_block.experts.1.linear_v.weight": "pytorch_model-00060.bin", "model.layers.62.moe_block.experts.2.linear_v.weight": "pytorch_model-00060.bin", "model.layers.62.moe_block.experts.3.linear_v.weight": "pytorch_model-00060.bin", "model.layers.62.moe_block.experts.4.linear_v.weight": "pytorch_model-00060.bin", "model.layers.62.moe_block.experts.5.linear_v.weight": "pytorch_model-00060.bin", "model.layers.62.moe_block.experts.6.linear_v.weight": "pytorch_model-00060.bin", "model.layers.62.moe_block.experts.7.linear_v.weight": "pytorch_model-00060.bin", "model.layers.62.attn.k_proj.weight": "pytorch_model-00060.bin", "model.layers.62.attn.o_proj.weight": "pytorch_model-00060.bin", "model.layers.62.attn.q_proj.weight": "pytorch_model-00060.bin", "model.layers.62.attn.v_proj.weight": "pytorch_model-00060.bin", "model.layers.62.pre_attn_norm.scale": "pytorch_model-00060.bin", "model.layers.62.post_attn_norm.scale": "pytorch_model-00060.bin", "model.layers.62.pre_moe_norm.scale": "pytorch_model-00060.bin", "model.layers.62.post_moe_norm.scale": "pytorch_model-00060.bin", "model.layers.62.moe_block.gate.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.0.linear.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.1.linear.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.2.linear.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.3.linear.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.4.linear.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.5.linear.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.6.linear.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.7.linear.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.0.linear_1.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.1.linear_1.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.2.linear_1.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.3.linear_1.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.4.linear_1.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.5.linear_1.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.6.linear_1.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.7.linear_1.weight": "pytorch_model-00060.bin", "model.layers.63.moe_block.experts.0.linear_v.weight": "pytorch_model-00061.bin", "model.layers.63.moe_block.experts.1.linear_v.weight": "pytorch_model-00061.bin", "model.layers.63.moe_block.experts.2.linear_v.weight": "pytorch_model-00061.bin", "model.layers.63.moe_block.experts.3.linear_v.weight": "pytorch_model-00061.bin", "model.layers.63.moe_block.experts.4.linear_v.weight": "pytorch_model-00061.bin", "model.layers.63.moe_block.experts.5.linear_v.weight": "pytorch_model-00061.bin", "model.layers.63.moe_block.experts.6.linear_v.weight": "pytorch_model-00061.bin", "model.layers.63.moe_block.experts.7.linear_v.weight": "pytorch_model-00061.bin", "model.layers.63.attn.k_proj.weight": "pytorch_model-00061.bin", "model.layers.63.attn.o_proj.weight": "pytorch_model-00061.bin", "model.layers.63.attn.q_proj.weight": "pytorch_model-00061.bin", "model.layers.63.attn.v_proj.weight": "pytorch_model-00061.bin", "model.layers.63.pre_attn_norm.scale": "pytorch_model-00061.bin", "model.layers.63.post_attn_norm.scale": "pytorch_model-00061.bin", "model.layers.63.pre_moe_norm.scale": "pytorch_model-00061.bin", "model.layers.63.post_moe_norm.scale": "pytorch_model-00061.bin", "model.layers.63.moe_block.gate.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.0.linear.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.1.linear.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.2.linear.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.3.linear.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.4.linear.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.5.linear.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.6.linear.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.7.linear.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.0.linear_1.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.1.linear_1.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.2.linear_1.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.3.linear_1.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.4.linear_1.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.5.linear_1.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.6.linear_1.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.7.linear_1.weight": "pytorch_model-00061.bin", "model.layers.7.moe_block.experts.0.linear_v.weight": "pytorch_model-00062.bin", "model.layers.7.moe_block.experts.1.linear_v.weight": "pytorch_model-00062.bin", "model.layers.7.moe_block.experts.2.linear_v.weight": "pytorch_model-00062.bin", "model.layers.7.moe_block.experts.3.linear_v.weight": "pytorch_model-00062.bin", "model.layers.7.moe_block.experts.4.linear_v.weight": "pytorch_model-00062.bin", "model.layers.7.moe_block.experts.5.linear_v.weight": "pytorch_model-00062.bin", "model.layers.7.moe_block.experts.6.linear_v.weight": "pytorch_model-00062.bin", "model.layers.7.moe_block.experts.7.linear_v.weight": "pytorch_model-00062.bin", "model.layers.7.attn.k_proj.weight": "pytorch_model-00062.bin", "model.layers.7.attn.o_proj.weight": "pytorch_model-00062.bin", "model.layers.7.attn.q_proj.weight": "pytorch_model-00062.bin", "model.layers.7.attn.v_proj.weight": "pytorch_model-00062.bin", "model.layers.7.pre_attn_norm.scale": "pytorch_model-00062.bin", "model.layers.7.post_attn_norm.scale": "pytorch_model-00062.bin", "model.layers.7.pre_moe_norm.scale": "pytorch_model-00062.bin", "model.layers.7.post_moe_norm.scale": "pytorch_model-00062.bin", "model.layers.7.moe_block.gate.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.0.linear.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.1.linear.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.2.linear.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.3.linear.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.4.linear.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.5.linear.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.6.linear.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.7.linear.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.0.linear_1.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.1.linear_1.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.2.linear_1.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.3.linear_1.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.4.linear_1.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.5.linear_1.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.6.linear_1.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.7.linear_1.weight": "pytorch_model-00062.bin", "model.layers.8.moe_block.experts.0.linear_v.weight": "pytorch_model-00063.bin", "model.layers.8.moe_block.experts.1.linear_v.weight": "pytorch_model-00063.bin", "model.layers.8.moe_block.experts.2.linear_v.weight": "pytorch_model-00063.bin", "model.layers.8.moe_block.experts.3.linear_v.weight": "pytorch_model-00063.bin", "model.layers.8.moe_block.experts.4.linear_v.weight": "pytorch_model-00063.bin", "model.layers.8.moe_block.experts.5.linear_v.weight": "pytorch_model-00063.bin", "model.layers.8.moe_block.experts.6.linear_v.weight": "pytorch_model-00063.bin", "model.layers.8.moe_block.experts.7.linear_v.weight": "pytorch_model-00063.bin", "model.layers.8.attn.k_proj.weight": "pytorch_model-00063.bin", "model.layers.8.attn.o_proj.weight": "pytorch_model-00063.bin", "model.layers.8.attn.q_proj.weight": "pytorch_model-00063.bin", "model.layers.8.attn.v_proj.weight": "pytorch_model-00063.bin", "model.layers.8.pre_attn_norm.scale": "pytorch_model-00063.bin", "model.layers.8.post_attn_norm.scale": "pytorch_model-00063.bin", "model.layers.8.pre_moe_norm.scale": "pytorch_model-00063.bin", "model.layers.8.post_moe_norm.scale": "pytorch_model-00063.bin", "model.layers.8.moe_block.gate.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.0.linear.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.1.linear.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.2.linear.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.3.linear.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.4.linear.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.5.linear.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.6.linear.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.7.linear.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.0.linear_1.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.1.linear_1.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.2.linear_1.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.3.linear_1.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.4.linear_1.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.5.linear_1.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.6.linear_1.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.7.linear_1.weight": "pytorch_model-00063.bin", "model.layers.9.moe_block.experts.0.linear_v.weight": "pytorch_model-00064.bin", "model.layers.9.moe_block.experts.1.linear_v.weight": "pytorch_model-00064.bin", "model.layers.9.moe_block.experts.2.linear_v.weight": "pytorch_model-00064.bin", "model.layers.9.moe_block.experts.3.linear_v.weight": "pytorch_model-00064.bin", "model.layers.9.moe_block.experts.4.linear_v.weight": "pytorch_model-00064.bin", "model.layers.9.moe_block.experts.5.linear_v.weight": "pytorch_model-00064.bin", "model.layers.9.moe_block.experts.6.linear_v.weight": "pytorch_model-00064.bin", "model.layers.9.moe_block.experts.7.linear_v.weight": "pytorch_model-00064.bin", "model.layers.9.attn.k_proj.weight": "pytorch_model-00064.bin", "model.layers.9.attn.o_proj.weight": "pytorch_model-00064.bin", "model.layers.9.attn.q_proj.weight": "pytorch_model-00064.bin", "model.layers.9.attn.v_proj.weight": "pytorch_model-00064.bin", "model.layers.9.pre_attn_norm.scale": "pytorch_model-00064.bin", "model.layers.9.post_attn_norm.scale": "pytorch_model-00064.bin", "model.layers.9.pre_moe_norm.scale": "pytorch_model-00064.bin", "model.layers.9.post_moe_norm.scale": "pytorch_model-00064.bin", "model.layers.9.moe_block.gate.weight": "pytorch_model-00064.bin" } }