{ "metadata": { "ParamSize": 515, "ParamBytes": 10885079040.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 262144000, "records": [ { "name": "lm_head.weight", "shape": [ 32000, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144000, "byteOffset": 0 } ], "md5sum": "8f8cba748f2c1332c4444f4ec6320bdf" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.30.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "50baef0d64575e48a8e1a5163027c3d5" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.30.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "b1a171c416220779b3be554c5591a8ff" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.30.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "378fbf079dfde7b8e897ba14b3c16801" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.30.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "578855dea0b839e2eb23a337a0df4b60" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.30.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "c1c615fde8e4542efee278f5f66af5ca" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 32997376, "records": [ { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 0 }, { "name": "model.layers.30.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 8192 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19570688 }, { "name": "model.layers.30.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 19578880 } ], "md5sum": "3c3f92f3388aff87c4f90bbb217959cf" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.30.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "173360f69a0a8aeb76fa5d37228711c2" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.30.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "4ff327da422dfb6376eefd969b8f32aa" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.31.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "37b9f6cf9666ce759d555ceef71f4b90" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.31.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "ff3e691039226f33062138817ca7979e" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.31.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "b3bdd854e2b3317bd1cab0af875825b8" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.31.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "69156dd446b4cfaaffcf984fd90906b9" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.31.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "f1f3aedd095104e6d2c7ecac1e920e7d" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.31.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "915e2ae1f9f71ef840e07a787fb342d7" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.30.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "51b387f9a5a8bf7f490120a1b552f602" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.31.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "f72096f46f787a9a0e50fcbafb66acf9" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.31.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "9a422914905f5e932d052f13695b09b3" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.31.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "f684715bec94f68a7d01297f354ab3b3" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 262144000, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 32000, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144000, "byteOffset": 0 } ], "md5sum": "048fa1442a0eab8ebee267640eba9252" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.0.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "407544f729e24a7499fffb4d1754767d" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.0.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "41086a76aaf1bcb068dfed84af2c4ad6" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.0.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "58cb1de577b25a0ec1f2d45593472461" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.0.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "694c9fd42e4542c990b639daeebce433" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.0.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "eb918ee85812683c03b3f615cb5beb2f" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.0.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "73c796ec6f55ad96d9f7a4389b830974" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 26861568, "records": [ { "name": "model.layers.31.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26853376 } ], "md5sum": "31e63b721c77ff46c08df0e0407faae6" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.0.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "f6c92d1bb738c82d6400d95f09e63341" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.0.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "7ef53970474416aec521959d324d5af1" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.0.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "80d05b9a7ad94c4946cc95d967516bd3" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.1.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "9a8739443de195ea44142909dd23e911" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.1.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "cb8123af3e7269629451430acf7b9066" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.1.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "92ae7ec6b2b463a5e835ba6849456f88" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.1.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "7f4b18e705fa4fecc0fe3ad7b096e814" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.1.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "aa2c9be8cb7e86bfd0f3369211be9b38" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.1.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "ca297fe1d6bc3c86a3a56e5d46f85c3b" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.0.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "275b3ef856de539eb598c6aeddc76495" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.1.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "41e8c13ed513c6e0bc085a2fd63cb28b" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.1.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "9b8b1c4234059d0fe63af6756493712b" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.1.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "cb616528d36b901f636f0c128401cba2" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.10.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "8774b36ae7a256d96ee25ec2017349ab" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.10.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "d7f5593cc9837a4d2dff4d104e2e42ad" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.10.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "b58f1ccc669d0a7bfbcc3b8bf0ec19e2" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.10.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "b9e3ded25fc914288bc441d29ab5c5b9" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.10.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "d47d43e6c1be5b623dc3b51a4bda0e92" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.10.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "ffd9bf3c656613d5c622a984a3645c72" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.1.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "4dd6af7504f0768ad9e9bb4c2ddf80a2" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.10.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "f06f4dcda8b360e8eb0b8179d8b1b986" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.10.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "9545b68a306f4dbc9433188be57e999c" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.10.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "3b844e50c16729e01c0f54216a379437" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.11.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "dcbff671e6278f46114ceb734d42b5f3" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.11.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "962ceb827866c8ae703c707ae101fbb9" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.11.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "98b6bc7eb4bd8f1998f60dd7083feaaa" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.11.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "0dd4d34c1858075cf09c368544251d30" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.11.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "b62282b43954d0a2cf5ea9eda9bc7814" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.11.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "09498211de1e94216ec8ff7ab737418e" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.10.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "a5043ee5c2be092e42999ae9cf0836e7" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.11.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "4deab194c4ad3da88238f823569bb855" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.11.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "b57ae1fa826991b0f6109f395da2260d" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.11.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "74a5711eda2654916bfd382812719a7f" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.12.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "9af620ba20463458dc4b50f15d7d19f2" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.12.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "b6f73e2522eaeec271a3b5882e3066bf" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.12.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "49460c9d0d50ac333e72b718cec3d22c" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.12.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "53896d9db4d070ebec99d4a55c5d0178" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.12.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "b8c3d5c27740a29990eabc2bd32793f4" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.12.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "6adb9ca4de20e341d0759570cbd63129" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.11.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "7277a328b49f73925c71e27549b5bc1c" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.12.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "890ea1dca6d82594856ccff4e40f41b6" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.12.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "1eec6b4fff4a14062b6cfb5f3088b2cf" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.12.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "bcc0f027ee052d2e8fd5997dac6c0a55" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.13.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "7fbe7993e8325c42f31ec3eb33e3b185" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.13.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "a5b1a40512d85db20224b953980d8e36" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.13.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "1f95f058cca23d9cb4d914798cf202a3" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.13.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "c3b42e91fc34ac8a323876e5095f085b" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.13.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "f8e5b45dd775253554954a67b1579f54" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.13.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "f56e90f1642398280870f9e151af9e4a" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.12.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "8b4c55efb943976da9d0080098f5f707" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.13.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "6616b96ad13f83e84375b5f0202dadbd" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.13.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "0154c425401ef19a657c74c81acf06af" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.13.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "0321ccb28a46cd3dd8ccd4c7128ca2ad" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.14.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "77c9b908b24c1b3b7db651fc0548e0f6" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.14.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "cb6e19836a84436d574208c58d6a56df" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.14.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "0b68708f1462b2a20ddd394da5383c67" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.13.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "f19251d822f327c727c9da2577fcb6b0" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.14.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "edced914d70818937dfb8d38f689260d" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.14.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "c65802414003b30a76cac7c70cb6f96c" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.14.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "58c8e14de6b429ae86ebd33e550e6602" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.2.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "9edeb90cc4d6eabb16e316fca72f3e50" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.2.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "a964a5052b13aa929f873331539a5459" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.2.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "70758ee56a2f160cd02e6301c41faeaf" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.2.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "30a71e975a9c9409168c6bcae0b62c64" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.2.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "33acd07d3f5b6098ab09d801d6e49c6e" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.2.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "8f86baa0db1d98960ed11a2e79769f22" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.14.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "521cb4652da631f1906dd14b921ca441" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.2.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "5b71720fbd37392b68ffb042f9e3cc2d" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.2.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "7b5a12f2215ec1d249de067a8cddb9cb" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.2.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "57f0a081d8a2686158478748c0d5a73a" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.3.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "539d5a28bf9eb683f48461cd783af654" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.3.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "903d63d9b1d5ce48098713f1dd7b3633" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.3.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "af53403f25f620ece4ec814762bae752" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.3.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "961f0dc34087bba7459cfed27509378b" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.3.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "1eeb93c253a79d0e6078b5eb0197a625" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.3.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "28dae9395002f184c7bfa6b5d2649b4e" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.2.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "40abb2821ed79b6b1b38073909a8bfef" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.3.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "c2d5b070ad7ecd69946a9d339286d63b" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.3.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "ec2878202851d292ab8d6c911c6e2188" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.3.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "4b5f437b8f986df5a2c16f80541e6c72" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.4.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "cc5d84f0726b84d556d1c6e4ba75f6b3" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.4.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "886b99eecef43fde57a84209a0b5e505" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.4.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "e5a7df849905e18cba028eece36c2e40" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.4.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "1d4b492ada2d4310c91afe7377baec3d" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.4.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "8fb2f83edbde47cb3490e8876b5081e3" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.4.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "c362cac5a1fd15537e3bf1cfed08b9b6" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.3.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "f63ccb8328c4d135d970e42722f4b625" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.4.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "3b3c462926c86bbd8ff06c999f423d17" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.4.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "7ea197cb5c63f886aecb537047be3454" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.4.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "61b0b73ea4261c4f0018bb9bdd1dbb41" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.5.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "66dc455419cc4c391fec86dcbea52d0c" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.5.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "6ddf69817fc883ea04cb5c5e86f0268f" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.5.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "6c42183ec7898628a6e56580099637f6" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.5.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "3560da5fb54e1561d3a39f4cc7e894ea" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.5.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "689ea96b04a6e8422671ac65cdb76304" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.5.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "febf1468beb7e6a5758de0f48aec72e3" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.4.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "343f5d2eed7cbbcd6312594f5e95c94b" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.5.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "f6c214a4db7506c9dc37ef606a69bc42" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.5.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "2dde894e308a8f9c9c60b1cf7d2bde73" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.5.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "ffbe379afded41a230ccdfe03f9ceb9e" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.6.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "dd1bf5a8607015a715da774588306c18" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.6.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "ea1e4472cb9f274667422611a1636244" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.6.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "adc3c1b641ed749bda80a527a4b41460" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.6.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "bc8c2a0deaeb17f04c02f7c6d1985cac" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.6.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "9cceb1bb983fd3e1d1161674e98deca2" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.6.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "c63e0867127e5a90e4a485c520af0771" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.5.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "cb9964ea425e8d1f48ecb56493778480" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.6.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "7bf7cf009a451d89b688cbaeb7318b87" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.6.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "d57317386e05695e7704a4755d72de9f" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.6.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "c7a6d8bee22abd229896e045f7ebbe23" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.7.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "db68b657a7d82d323dd057f2de621452" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.7.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "074924b2e9e0b28bd1477c2f424496c0" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.7.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "b005393c6a2d5827e18a351ed4884897" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.7.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "c3fae0261b4f9d156fefa1586c974c75" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.7.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "6af8a7b5f51d7a43fc9f570a290231a3" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.7.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "0a8e4dfcddf1437d72e4bb7728c89738" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.6.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "7acb09c4465419e143724b5ba9a0afb4" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.7.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "78cc611dd833114e61c80c203117d217" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.7.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "a90d7a89fc652be0a44674406aefa43b" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.7.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "07cef2de1c8fa26e759e7258b333f7a8" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.8.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "5af8d15ef2b7df3c4f9fcff58d96a0cb" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.8.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "7f890766ea157dee02b6ebeee6c74115" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.8.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "d659290c8a101f83e32f21de4b3055e1" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.8.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "0ef87071b4df5d6522c352ee7e655b9a" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.8.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "78f0558c29b441f7fe7e863c04e0d4ff" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.8.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "be76d101c87c2d6fbac8d7bc5c972150" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.7.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "357d1a7439d2645b30987294d57219f3" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.8.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "517e40d42ba11f0a22a1026b8824c4a2" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.8.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "f291268458d4485302b9da95e2e64f43" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.8.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "07c30406279ce00f03f391615f52194c" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.9.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "9614f23f002dee7d030487e1c2499574" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.9.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "66ee97f1f367e0959be1f10a5dae20e4" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.9.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "a7c6ce7abc182d64f8ac7d06b5ba90b9" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.9.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "6fd783489d749db4b035f874c27ef51d" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.9.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "8c8ed52e2c81cd55d6b464af2383f716" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.9.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "a1aaa643deb1b4db64ec7bb171ca1dcc" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.8.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "c5f0f28fc51479973956404f99ff2ce0" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.9.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "9444b0c1222879dbd0b1236f392eabf2" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.9.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "de0edd1f0ae6bb1a412c0363b18af0ea" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.9.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "ff4ae995a4578372b66754bb0183be42" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.14.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "c5e92b48c06f02888325687f2bb0c40e" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.14.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "5cba202c751f6ee6dbf0af5795318a67" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.14.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "42ff3f248392ec45a4896fc6408ba840" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.15.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "9edd284591c8dce0697b4ec77f888088" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.15.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "0d24f66ad51dbea50142379d3ea8980a" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.15.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "7753237dac16c89c7b26198b3c78303b" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.15.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "4a6361dc4e71b284772eef2c28b2c234" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.15.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "d08140cda5d5dfc91e75c7d307b15367" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.15.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "57f8823a287848ae822594156a9c2504" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.9.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26853376 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26861568 } ], "md5sum": "629f0bdb713cfd1839f433973272017b" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.15.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "2927740c330f0b16020a0adeea8ddc94" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.15.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "f7a6a319b05328df07d94a727db63658" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.15.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "4cb0c508ef18fcfaf34774418d9b31bd" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.16.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "66a7b1bcf7e828bb62c551ffbec9aa2f" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.16.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "f418e128ce2aff2eebfabdf88d7687a0" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.16.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "11e5ddfb78e3baab79150ce715d3fab6" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.16.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "63068b9b095c062964765b9efce06360" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.16.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "ef3b64ff9bd066b1f818b095653ce18a" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.16.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "1f60dfc1653b167dd836a459664bc322" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.15.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "15bf01e89947cb2262f017307b9dff60" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.16.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "9ffed6dc5a13e32794b2c8421b0b937a" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.16.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "12f143ca4a5c830786f792deea273b93" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.16.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "a4368261f7950c9ed9ab9198ae0ec32b" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.17.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "440f44f6fc18ba7e9555907e9360b205" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.17.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "a8c66196399af6a3dc2076cdf540b0e5" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.17.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "9ce48eb9e823f16d8205f9f1cf3b815d" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.17.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "b5ca6c140d962032401bdb167990ebd2" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.17.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "5568141c8f971ae510aa954d9358275c" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.17.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "0746f3bafa1f300bf50d37a0bb383ac9" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.16.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "2aa85fd11c08e856d805eeade3bbbef1" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.17.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "7f1235b932ce15ee572705d3d0798ed7" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.17.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "3ee971ec43a4bef296eba6d541b6b89e" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.17.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "3e28d306d60c14fb3465a6a3f2a5b740" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.18.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "dd1805aa82bc92fddc893650ccce7234" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.18.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "e1d6246a409da49d454986caad0722b8" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.18.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "bca1ca4b4bea715489c48a5a92107146" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.18.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "960931c0eb725f92457888c5fcc30633" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.18.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "47acdd1c2897340fb78654ee948b9fb8" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.18.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "4b2ea41df231bec4018cd39eddf4fef4" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.17.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "243f177d24f8383f2e0ca1b185c011a6" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.18.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "bfdf06a04d4fd1bf1bcf58fbea80367d" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.18.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "324f144c18bf7f54b96d3f0662a51062" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.18.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "64638b9a33014c3eca75381605be8d70" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.19.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "1affa2461b424b5be6af51e6f4303dd9" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.19.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "399209d73429b36221c1a328d3ddb60e" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.19.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "1f0d640571cae4cdf07457f91b2e1848" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.19.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "932aa50041ed12828e835c2402005c8c" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.19.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "5380a73bcf1219455e7c46bda66f0b36" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.19.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "ebfd4b815cffae29c650d40d9fffa064" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.18.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "8a316173ebc82f342a6dedb3ae9f964d" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.19.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "d0dec70aced8b1f9b020c6ea260715ef" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.19.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "798898a26d4089344300fff45078110c" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.19.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "10a2ace05849a99016241ee334bb0576" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.20.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "f89ed10e76fe7405835eebf7dd3231f1" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.20.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "2930725479287cf4583564ee04a86cf6" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.20.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "f6ff917fc241c271d808cef73d43b489" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.20.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "ac78e6ea71866a3f356a6e28a116fa50" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.20.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "f3b56c2b90d5d5d04946caefe392d4a2" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.20.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "8b7c756aaa345d8b7eb9f4b8e0c43589" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.19.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "9f912ef7c53f435e80100feae56b6bbe" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.20.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "ed8d4d945f5201bdb7d4542216bb1e69" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.20.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "d41f7d798dd820a289478eb6c00e7193" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.20.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "13af6a946406559af867660cce680928" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.21.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "cf278a837693a8478a8200e5558b43b8" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.21.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "8595467e986c05c6bea43d0456c7c7c8" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.21.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "ee2d5df7de0e52508c972d5a10fc3123" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.21.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "e71a12f647296643cd89d72b185783ce" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.21.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "bbb12113ff2356f1fdcaee23f52c47e7" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.21.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "ab616b3ce6f5c3584be792ee26dd677c" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.20.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "e4b887c19853f225b8dc1e719561f6f1" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.21.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "41abc098bc5db59a5b7d26f72820f054" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.21.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "c07dc3eecfcf879d7aac734fd63090b4" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.21.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "d125c124fd4a709ceec0188d70cef4e1" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.22.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "6dc047a6e0657b9d8228f6a6b12aafad" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.22.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "b70eb2cd6916c0c4d2d9c35e3bf33e65" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.22.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "22d65b51eaf270fb60c515b64a52717f" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.22.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "216377e8d01a01b642645df274e70c56" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.22.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "362bda4bb577167a424208d1855ea9db" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.22.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "8bd14ff62aeb602763e90fae8954afaa" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.21.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "8e026844a1ab9ea7677939d11bca5ba1" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.22.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "8fefd59cea829729f48614cc1857f714" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.22.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "95c72c0a7ff63d92101da95f2b457d20" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.22.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "0ccc02b2b29a7b90d4df8cf592c5fc0f" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.23.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "fe6a032795932c0ab91c709e62ad35d9" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.23.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "8ab384e2b671d98195329ee0957da0e4" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.23.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "0c4a3045f99e95389780655a2aad9b88" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.23.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "60545d677854a67a634203ec4702006d" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.23.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "1a7b61effc32e157c1a9efc161d2e868" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.23.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "e09a81c16f14f2cdf9cd221ab7fe33e6" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.22.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "2f8abed4811af5fffe2a9c29280efbba" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.23.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "a985b271e18af10eac4fc3d47b96c20a" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.23.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "71ef3df85e07dc396c49399d76407fb3" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.23.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "8471dfb0d4f926626346c55372d5cafe" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.24.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "59196a590d9cf5317777994b88c5f9aa" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.24.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "dcea13a6c4f96a39036926708b0f8441" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.24.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "737f3fdc15447396d08fc5a0a6918a85" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.24.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "ee3e5b955bac15db2e7c04e883811df2" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.24.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "a2da1ced52f711fb349de4f7ed740124" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.24.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "e590ece0ae6440bc21664863437535fd" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.23.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "f7a0f8a306225ab9e1e61c292e7bcb0f" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.24.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "be242658808957b064ff94f40e039efa" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.24.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "cc6af8814cee7a4d3a9ab12acb967557" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.24.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "b7c34f72a66a4a24a1639bb1fe7f61a5" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.25.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "154e6ae2a43c39b41cbcbf9ddee85c02" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.25.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "dfe9c7a865ac5dcc053eaf9df916b0a6" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.25.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "bfc7dd4978ec14e7ec49849879ad9be7" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.25.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "a9c966ae9974467152322fbc1b069cb7" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.25.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "9b07c8653efd92199626984fb2ef4535" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.25.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "013a4670297e1c9e86f05cde63c103ac" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.24.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "4ec1dabc801441082f36bee0dc47f770" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.25.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "e99cbf0b1b34a41d714b42acc3a793c7" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.25.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "065bfc3ac0ecd3a1a45c0a2bfa4f2a29" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.25.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "5fe512c5ae88523599368c473b58ca1e" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.26.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "ee7d8c03689cf9988a908769247c6d6f" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.26.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "52efef6146397826ae46430183227c66" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.26.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "bad645085fc2ebdd3fa39d36b1e3b817" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.26.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "807be59745497acb86e2e25d4ab7d97e" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.26.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "5b5d35d7085f4827e0d81496e3184deb" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.26.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "d78636b1102cdd188a005087c5de46a2" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.25.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "7deb2606aff78c1581c169c683eda104" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.26.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "886daed65de06c94bd91fd06351513ef" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.26.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "7c29f2d5370c0b4a44cbeb008c0feb64" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.26.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "0c8d45716576b3655284b4eccf7c7d0c" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.27.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "218d3e7fcaba6b01de81fa4c5095ff2b" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.27.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "f03f042a72e73708255de29ef6d9dc86" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.27.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "584a53bd8644c4279c49209e2598bae4" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.27.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "53626a2525f6593141ade1998c3a28c8" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.27.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "c3698301c3ee98049c040e0dc2b928b5" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.27.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "e599575a0f7d0a61751b151fe0c8b68f" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.26.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "aea424c3b9ce87f00b5bd85fbb62703f" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.27.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "82b7cd13636cb5e33289e070216a6107" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.27.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "6425030ea68b80dcf9ad93c83cda3dc6" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.27.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "524208ce43374184f383e6dbe1c70220" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.28.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "4b6c094053b7185878af1e9dee0cbc2a" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.28.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "7fd95a999ae516b00c1a635d28593764" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.28.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "1f996d22dc8d53f9ea97cf54dbb74f92" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.28.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "3a09ab7a2f492ccf1afca49ecd87b217" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.28.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "dc4d3d6a019ce2f879dee445a3b4aff4" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.28.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "9d771b439f220a23887f22a37c7dcf9d" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.27.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "2562dd7cd77913803c1dba0b40cabd79" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.28.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "e2a84bda42afc574a45e9a4118c5f5ea" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.28.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "983f4e42685ca35d733c1e856f8d481c" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.28.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "bbe33b4e740a5459a1313219cfc48fab" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.29.mlp.down_u_proj.weight", "shape": [ 4096, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "e2154c051653f4175b50536030d2457f" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.29.mlp.down_v_proj.weight", "shape": [ 2388, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "161f54e64d1986c035646f5fc929a116" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.29.mlp.gate_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "463ead176ee471d2ef5af751b10464c0" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.29.mlp.gate_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "fcaab30b99c74534131009c8dd21acfa" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 52574208, "records": [ { "name": "model.layers.29.mlp.up_u_proj.weight", "shape": [ 11008, 2388 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52574208, "byteOffset": 0 } ], "md5sum": "4748dcca3b0c92400ecd8cdfdce4edf8" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 19562496, "records": [ { "name": "model.layers.29.mlp.up_v_proj.weight", "shape": [ 2388, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19562496, "byteOffset": 0 } ], "md5sum": "964a7a2ad24ec29a7598273e13fecd65" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 26853376, "records": [ { "name": "model.layers.28.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26836992 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26845184 } ], "md5sum": "6cfa531f18b011bf50f7d902a8b7159c" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.29.self_attn.k_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.k_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "b86c4c7950fd3dd5067c3983b1617ebf" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.29.self_attn.o_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "dc8ff47bfd1f3b4ad5b5842021e7d6fe" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.29.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.q_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "ea639b4491487d7982774c1fcdf80de8" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 26836992, "records": [ { "name": "model.layers.29.self_attn.v_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.v_v_proj.weight", "shape": [ 1638, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 13418496 } ], "md5sum": "9750c2b37a68cde9aac2674c360d6166" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 13418496, "records": [ { "name": "model.layers.30.self_attn.q_u_proj.weight", "shape": [ 4096, 1638 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13418496, "byteOffset": 0 } ], "md5sum": "ec2a3078030db362fdb2d7b48eaa96b5" } ] }