internlm2_5-20b-chat-q4f16_1-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
1d3d57a verified
{
"metadata": {
"ParamSize": 485,
"ParamBytes": 11172753408.0,
"BitsPerParam": 4.097919499366067
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.0.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b9b9e5d249b538a2a23cd9d6019981ce"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.0.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "b75a004c453f62cb53310dd9f39a4e50"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.0.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
},
{
"name": "model.layers.0.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18874368
},
{
"name": "model.layers.0.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21233664
},
{
"name": "model.layers.0.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24379392
}
],
"md5sum": "e6520c88c12f921f441acee5398ff07c"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.0.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f3ebb9920e8b85c57da98df48847ce9d"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 284295168,
"records": [
{
"name": "model.tok_embeddings.q_weight",
"shape": [
92544,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 284295168,
"byteOffset": 0
}
],
"md5sum": "ca14145a0c0c4c397f63d05d3efc2835"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 35536896,
"records": [
{
"name": "model.tok_embeddings.q_scale",
"shape": [
92544,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 35536896,
"byteOffset": 0
}
],
"md5sum": "0ea7afeba332ab1e5b9532773d69a3b1"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.1.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "77dfcb6bdbf516add8ecfff6d44965d2"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4d57d99f602e5e7564264495c55bf3b6"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.1.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "1692de5d18f15e7c01bb91a26dc0230e"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.0.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.0.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.1.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.1.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.1.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "98e4b0820b49d5bccf6fe1dbb8be7ce2"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.1.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "72a985405bb3d53f60b97bf1e2332e2b"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.2.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "cf1fa92d5b1a4100c6549a2c884b4983"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "fe5145ac7899a1d1de150d2ebc809cff"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.2.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "2034c0e4cf1503838051c15fed1a11bc"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.1.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.1.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.2.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.2.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.2.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "238d86afda94541e05ffba291879c288"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.2.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "b038ebb996dcdbcda9e26155429e364f"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.3.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "e9f17c0201ce0717438d68866358cd96"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8fbac1aab61a4fc5a7c43923d2318165"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.3.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "0937017c79435c4e9fe98bb3e03f40a6"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.2.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.2.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.3.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.3.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "a36f20dc7a6ffe035aa2583d09804c63"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.3.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 12582912
}
],
"md5sum": "2bba0fd99b16bc9c746f856e35ebdd73"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.10.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "0ffab72d9b34516e2118eace3282a859"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 30683136,
"records": [
{
"name": "model.layers.10.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.10.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 2359296
},
{
"name": "model.layers.10.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 27525120
},
{
"name": "model.layers.10.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30670848
}
],
"md5sum": "721df0faac45e9bed34d03d5722df4fa"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.10.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "e06a46bd4ff4616470764256b08ccaa5"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.8.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "e94c72f18607594bf53fca9f8ab6e9ed"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.8.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "a2b1f939810b4c0898f0503237eafac4"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 25190400,
"records": [
{
"name": "model.layers.10.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.10.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.8.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.8.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 18898944
}
],
"md5sum": "c8057dea8cd1821e44c0c9dd91c41d3a"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.8.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.9.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 12595200
}
],
"md5sum": "1466895e1c5cbc65be0449eae1ba0765"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.9.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "1c148c5952261b2e18fd0a68e1edd1c9"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 30683136,
"records": [
{
"name": "model.layers.9.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.9.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 2359296
},
{
"name": "model.layers.9.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 27525120
},
{
"name": "model.layers.9.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30670848
}
],
"md5sum": "2113efd5882d73352a9dca055ccb605f"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.9.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "b4e775cd3ccdb408b60c073ade853922"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.11.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "97fac31ac46ddf3359658e56238b008a"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1b10c5106b6f876782ff41ebcbb8c55e"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.11.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "fd7a2553ea053177b9c55ac60d7fba99"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.9.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.9.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.11.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.11.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.11.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "d7984a5f3fe1753cd27283f9d0159c6b"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.11.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "02fae08b842e37606de9c803d5488f48"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.12.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "aa42bea3a653e85154609159e5a73fcf"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "fe075a318e0f8b16a4d2a8368d66413f"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.12.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "45c07eaec64f198398cffa978d9a83f5"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.11.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.11.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.12.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.12.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.12.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "162ac15e1b59aeb7d672ca7ec7fa4f42"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.12.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "be62d70497cdc602815537e2b6187b40"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.13.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "3a2b112ed21e5a0b5420a8c62d458bca"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "cd5d417be0799a24a579c9fb626146a7"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.13.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "ee35ec38f8c514405574ed5a4f3c0d1d"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.12.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.12.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.13.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.13.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "f24ddbaf32a5d7f5b71c7e5cee8d01e0"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.13.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "b5aa78602d686a8a798ba8edc5ea12a8"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.14.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "e70489449fb9987fb1923e5ae5f20f3e"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2cbdca22297f8fb2492e184b2498c5ef"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.14.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "bf26d2932c39e61d8f16d72d1a1007cb"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.13.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.13.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12595200
},
{
"name": "model.layers.13.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.14.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.14.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.14.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "4c345ead3a6d1626b0fa94d39f760f42"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.14.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "05ed145f96a749f1e5485007ba9bdff3"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.15.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "91b196f1ee5b0c9fc7eab50972cfb94d"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "46190f54ec12b2d53e0f4a39ff7606e0"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.15.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "65a10a2088cbe633298f90961d3008c4"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.14.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.14.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.15.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.15.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.15.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "5422bdbf876020a942b0ae48542e997a"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.15.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "40d406373edcfe671148f51c58188b3d"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.16.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "f36308450b85047579030599cd4807fe"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5d54b81a65ed98f8cb420b3ac5c810b3"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.16.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "92128fdd9d2ce3d54b5cf38b9bc28391"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.15.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.15.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.16.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.16.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.16.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "e6888c0a747108846cc9f2eb2cb2c98a"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.16.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "246725ecc77323925205f98ad2ad48f9"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.17.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "bf703b923c7bab71f5bf4e4b5f892a76"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3d97de2553397e1bb18768648aa64868"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.17.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "76dcedeef122b7058d63692923aef750"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.16.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.16.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.17.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.17.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.17.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "47a0c159a9a9a3fb72c2b811faf05d0a"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.17.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "490c21a440f5e1714261de6a9549760c"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.18.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "db9a510efeca4751586b1ba77c862e90"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7b040c1c18f4abc6b9a2e61411dd13b7"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.18.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "38dccce8fae7230bdaf6908b657314df"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.17.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.17.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.17.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.18.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.18.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "14a71172fc59a14a8f2f3c5e9b343fa3"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.18.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "b09d8cac1fa6217954b45c6dd9bf8a5f"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.19.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "7d623a78548db9de6ecb0ba26caf9473"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "830dd81fda54b730aa38974808d42f8b"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.19.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "ecdcdee88ef207c0edfff16c4963c792"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.18.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.18.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12595200
},
{
"name": "model.layers.18.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.19.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.19.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.19.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "af9790c05f42784abfeda3221644f278"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.19.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "353670f439163af331a12230bbb91c31"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.20.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "68d29e416891959e83934fd381c3d5d5"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e68baff97678fedcd3690b52f3570b6f"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.20.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "23b1527e1c6ca325159ad712bb0d9392"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.19.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.19.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.20.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.20.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.20.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "43e154db72c887328011b222eddc1d55"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.20.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "13717ffad5b2cf7c094193bde6a283d5"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.21.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "37274533a84b02617807533fbd266104"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c2f2273008df8d15839af1d97d24dc1d"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.21.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "31b73de45831f78b149432353e767fd1"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.20.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.20.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.21.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.21.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.21.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "3de2c2a06fc1c7ffe7167fe61d34680a"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.21.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "86af285350ed289af29807476b3f9b97"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.22.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "89aecee669e406a982e7ea38e48d5e5b"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c7d7401860f343773355a3c115f23f8b"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.22.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f7f068722b55e80671ec8d759209c9ac"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.21.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.21.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.22.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.22.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.22.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "00d5d456e1cab93959fa3cd1714fe941"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.22.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5ebe811f4c8b0122fcdc1060af17df4f"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.23.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "8471703fa9b94b2c3fcf5eba6455dd60"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "76f0d2a2efcae8fa029a95f8ee24a246"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.23.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "4a6b1934376dc0218dbac30ed21a9d5f"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.22.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.22.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.22.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.23.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.23.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "f44fe42d3a651dd8833e7bf0e00bf3a9"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.23.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "ac1dc6aaca1c4d25bbd6a4af1511ff10"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.24.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "ba7c52d984110124ed7f4baa601ad9ab"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.24.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "fbb39581ba624860e4669edfba7247e1"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.24.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "0399dc767317b8c3adc8f4ae04f2e113"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.23.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.23.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12595200
},
{
"name": "model.layers.23.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.24.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.24.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.24.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "b5e0a5bc5eaaf33e740699713867945f"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.24.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "abecd49f7941e61eea6b8c100cbcdc14"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.25.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "64399e2619d7a68fd741391cc395cf20"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.25.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5321c22526b257abae7ace77691f8f25"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.25.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "fa386f927b426e09c683e2c0d7a6af69"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.24.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.24.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.25.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.25.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.25.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "e99c9703a632dfafc70450a02c4ecd9d"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.25.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f0a979ea49807bbe454f4a697e6a6af0"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.26.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "a0f3fe8acc8c07a35822ed17144eccd7"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.26.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "22b9e6d91cb37f08c0f50dc0b336a60a"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.26.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f40f12f4b4550c84248a2efc1b7bbb36"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.25.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.25.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.25.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.26.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.26.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.26.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "e635be28861255c987c0b2f55e4811ab"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.26.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "4de9a0fa239fabd4ea050e6ad13eebdd"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.27.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "05170ab9db2c8aad45f180ea988722e9"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.27.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d46ebfefe27eb12184a000ec9c408610"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.27.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "b42151e035e34fd5211c53d960d6b8b2"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.26.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.26.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.26.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.27.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.27.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.27.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "2dca9dd7364013a355cc0cc816eb51ab"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.27.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "0a2c75767433aea6df65cbc53bec2167"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.28.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "2b59db67da1499944f0ab0a729bd7154"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.28.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f28b8d9086d55030b4308cf42bac76ae"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.28.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "345efd5241119907c753fbb3db159cf9"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.27.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.27.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.28.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.28.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "51f608415b50d03292e428f7346f8858"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.28.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "4d589f4ebf7397dea54d54df9c60f02f"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.29.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "2e9327a41378044126474c294689a3fc"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.29.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "593cc919dbc8eb0d75eaa59b0b3ef6ec"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.29.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "9bad7a0e49fb4f8fa4147214514b4883"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.28.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.28.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.28.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12595200
},
{
"name": "model.layers.28.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.29.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.29.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.29.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "576d510787bfb1e7b012a0c52da43c4e"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.29.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "b6352dda9da320bd8a71fd182f2fcf92"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.30.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "8e83e004ea05000f072f5086400a0682"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.30.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9e2954aadd935b9e64748a49abf7202d"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.30.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "2413d9696a13000692c530d69ceb5f98"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.29.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.29.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.29.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.30.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.30.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.30.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "6c4381ff3020c4b4bcdd27c3aa20882c"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.30.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "42f17b9c6aea608c2132b8a5414977e5"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.3.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "dcdbdffde2d4812ce4c25c4ab293fff3"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.4.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "0d441147c8c220d184bc7a29a7c878c3"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d614ff9d28eab46ca9e2c7019486ef92"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.4.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "bdd6f89340e69f6715dcaedf8aa83781"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 30720000,
"records": [
{
"name": "model.layers.30.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.30.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.30.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.3.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.3.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 18898944
},
{
"name": "model.layers.3.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25190400
},
{
"name": "model.layers.4.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 25202688
},
{
"name": "model.layers.4.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 27561984
},
{
"name": "model.layers.4.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30707712
}
],
"md5sum": "c8de6ad9681b6e394bc8819849b03756"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.4.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "7cad850ad5042fc7f5a9fc8cb0874308"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.5.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "915fc8f2006351af222d37a9044dde10"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "972f85a3f93ae5f84bff1da4fa5e0030"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.5.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "b73ac7ae2b42939e5eea0eed24c0959e"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.4.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.4.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.5.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.5.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.5.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "8a319ed0157880eb0e20298ae24a9137"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.5.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "0c4b8414a4671aaea6b15db3740089c0"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.31.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "e7001ce4fc17f9de8ec82fae94ff06aa"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.31.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7a25b3067ac0d4e6d240b588ccf4d37d"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.31.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "9da3d0628fb4d6bbd4ef79079a329765"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.5.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.5.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.31.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.31.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.31.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "055a8997ca2eee22cd74d1b45f01bead"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.31.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "24a5a4dbc97c24e14f5813d2d3203bda"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.32.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "78cf1b841c2d8bca6eed41273d6842e2"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.32.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6b4a25b89401c71edd3e3e9fd1cf8b8f"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.32.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "95ef384812d5bcb40783faea1485d38c"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.31.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.31.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.31.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.32.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.32.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.32.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "68289130bb93e6f4eac3bdf3d284b401"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.32.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "a2d72dbf335134fa3c889848a0264f43"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.33.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "d82c9855b12685d6de514a7923332cf2"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.33.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5834dae9f685eeb46df9ce91a4ae6f49"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.33.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "281aec78040f897d50bba3dc23421143"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.32.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.32.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.32.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.33.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.33.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "d11703fb84b835d9e1ee31e75753908b"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.33.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "d48c63086b925cf7880dbb60170aed98"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.34.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "0570540df18fc7cfcfe6417c2b976400"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.34.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8db337e43f10305f1dc9ca9f12999301"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.34.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "adec4a6bd21f3574bbd467bddae41451"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.33.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.33.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.33.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12595200
},
{
"name": "model.layers.33.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.34.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.34.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.34.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "63bf25fe91c0791e55de7b2e8b19a4cc"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.34.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "77f3f7ffb311e4b9a9ca02cffd053ec6"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.35.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "93d218d704627aa09e6795f93d91250f"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.35.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "dc41359b12ab75c08ab15da606f1813d"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.35.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "9e5e83ff86b12484dd4e71651e9c2a2e"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.34.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.34.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.34.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.35.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.35.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.35.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "06aebcfd006c39b66d096ec2ad7bd0db"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.35.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "96da9a8c9573fd0ce9e5fa3319b3ce79"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.36.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "18e21374c9266abf6a1893f6b19824e1"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.36.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4f732c8876e920461a3ac01bf1db04c4"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.36.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "b6447ad2d5fb20fdbd4f2892b8a03f54"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.35.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.35.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.35.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.36.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.36.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.36.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "c06d8df77c349eb5f0e9bfa3248c6e3e"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.36.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "dede3e093af0ac37893fa06367aaf846"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.37.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "9ac03336871aa4003a8c40b5ad0429e9"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.37.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3e4004827b55b177a7ba54d3592bc1b0"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.37.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "2481e0b4d019163e0edfe2ae03393001"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.36.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.36.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.36.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.37.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.37.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.37.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "0474791c9ced2b4366cdd93234349050"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.37.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "645222585421be65fa6708fc85743a42"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.38.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "79061c56e9d8f780488219cfb82d3d1f"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.38.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "46bda62cb8057c1f52ae35424526dc1b"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.38.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "9662209a638db26ba256c909b99a34ad"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.37.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.37.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.37.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.38.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.38.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "48354f641c85a8d5988634844e168b85"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.38.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "00b0cb59d8217ae84612648a467ac555"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.39.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "e5cbe25654de4a7b204a3bb524335f9d"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.39.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5478c9f09b9766d3b423114742828a6e"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.39.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "dd2f6f845ee3825755f8e566cc8d2c3d"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.38.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.38.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.38.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12595200
},
{
"name": "model.layers.38.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.39.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.39.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.39.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "84d4b73a97b64848a6474d89516de110"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.39.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "08e0d3541b27e6e0aac96c6fbba8826a"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.40.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "81ca8b31e6c7e67cf7a94ce64753099b"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.40.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c6eb99b73593a242a4a449193041a921"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.40.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f2cd3d7160965b9f6561e86558c5373a"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.39.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.39.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.39.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.40.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.40.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.40.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "46a45114942ac1536a5bfbbbb258edfe"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.40.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "8d5c77f4235102f10fac0edffda3709f"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.41.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "fa4ab53f55f372541841f40b9e2a8cc5"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.41.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b95b1f1845e488c54d88d517eab36217"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.41.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "0e87a108ec599d27216e682a6c819421"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.40.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.40.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.40.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.41.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.41.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.41.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "810fbd5a022bc83f49588f0652187512"
},
{
"dataPath": "params_shard_196.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.41.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "7c96e0abf603520439cc06e66713527d"
},
{
"dataPath": "params_shard_197.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.42.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "4cad6d7a1f0f0b6bbdbc63ad2ec9b19d"
},
{
"dataPath": "params_shard_198.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.42.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a01c4ec3d3566a6ee9b565733bf1d950"
},
{
"dataPath": "params_shard_199.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.42.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "ab36738df336067dd4ca9128c278df1e"
},
{
"dataPath": "params_shard_200.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.41.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.41.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.41.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.42.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.42.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.42.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "a9ae49484edcb9c4c6f49dc1987a9569"
},
{
"dataPath": "params_shard_201.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.42.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f15f494aa91d4630037100b7cc223816"
},
{
"dataPath": "params_shard_202.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.43.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "bec4b8d962fd790d33b672bf28ade8f8"
},
{
"dataPath": "params_shard_203.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.43.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "50fcebef2f22b3a70553d627748d7bde"
},
{
"dataPath": "params_shard_204.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.43.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "bc2cc08f2dafd46150c5e6fe11f557eb"
},
{
"dataPath": "params_shard_205.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.42.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.42.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.42.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.43.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.43.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "44c8668d170eb42a9bdc808fa40e4e3f"
},
{
"dataPath": "params_shard_206.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.43.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "bfb97d805c9de9cd897d1482fb7ac431"
},
{
"dataPath": "params_shard_207.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.44.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "962e406c298e9a03b4b816bc98b6423f"
},
{
"dataPath": "params_shard_208.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.44.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "fb5bae95e382db294d05491b63244046"
},
{
"dataPath": "params_shard_209.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.44.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "58b8076966f4cdd9833ea641a0aba4a9"
},
{
"dataPath": "params_shard_210.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.43.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.43.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.43.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12595200
},
{
"name": "model.layers.43.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.44.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.44.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.44.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "8cbf2ae05b0257047c056673f64aab40"
},
{
"dataPath": "params_shard_211.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.44.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "78c64985dd1e09e3d053e3da668a8f82"
},
{
"dataPath": "params_shard_212.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.45.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "1ad65acda5fbb096cbce23821100d0c8"
},
{
"dataPath": "params_shard_213.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.45.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "dfab68d1cbb76d655583d24c6b809c6c"
},
{
"dataPath": "params_shard_214.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.45.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "7f3125368c8e829120b04168d9446726"
},
{
"dataPath": "params_shard_215.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.44.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.44.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.44.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.45.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.45.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.45.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "e93d197cf530def9dc0ccd25dee07f71"
},
{
"dataPath": "params_shard_216.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.45.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "2b9e842bf09a26e1031090aee3f33e78"
},
{
"dataPath": "params_shard_217.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.46.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "89ce957b587abf7bcca0f5f54b7092ba"
},
{
"dataPath": "params_shard_218.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.46.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9429ed983670223155944d2a4a865031"
},
{
"dataPath": "params_shard_219.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.46.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "9d632c55535cdce5571ae9692a5e8f7a"
},
{
"dataPath": "params_shard_220.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.45.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.45.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.45.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.46.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.46.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.46.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "be8fe44dfff8ad5da154c060124b5a27"
},
{
"dataPath": "params_shard_221.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.46.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "e73a84dba7df3edf3212aca4d43b892d"
},
{
"dataPath": "params_shard_222.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.47.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "7a0791319f2cecc1731130321ccb01cb"
},
{
"dataPath": "params_shard_223.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.47.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5c3e84b69f0c7a3a63157560fa361b54"
},
{
"dataPath": "params_shard_224.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.47.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "67efaa8dc17af7cbe4a10922605975d6"
},
{
"dataPath": "params_shard_225.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.46.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.46.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.46.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.47.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.47.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.47.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "ff36f2a1a4a79fd1edf4d84753c06fe1"
},
{
"dataPath": "params_shard_226.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.47.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "7b242ef01f8bc94265b14c88f4fb5a85"
},
{
"dataPath": "params_shard_227.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.6.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "4096c8406ed4e677033af46ad337d0df"
},
{
"dataPath": "params_shard_228.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ed49904cb5632a1ebad2f2247c5d4fc8"
},
{
"dataPath": "params_shard_229.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.6.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "fc7e1357057d708152c38af23e367a85"
},
{
"dataPath": "params_shard_230.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.47.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.47.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.47.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.6.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.6.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.6.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "40c0dc0ccabfe5f1d9f78b726a360031"
},
{
"dataPath": "params_shard_231.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.6.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "cc7f3e7e05fdb6a0c73600ef94990adc"
},
{
"dataPath": "params_shard_232.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.7.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "fde36954854070812585cb2c59d503ab"
},
{
"dataPath": "params_shard_233.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "18a104c8923e0d56d349f11f43793f0f"
},
{
"dataPath": "params_shard_234.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.7.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "7864bfac4b3dc9bffd17fced37cf9be1"
},
{
"dataPath": "params_shard_235.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.6.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.6.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.7.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.7.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.7.attention_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "fb98f5558e6c59c07776b7b225f736c8"
},
{
"dataPath": "params_shard_236.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.7.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "6facd90a35565420697823941fb45f41"
},
{
"dataPath": "params_shard_237.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.8.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "3ac01a84379a87490d93c3556016fc7c"
},
{
"dataPath": "params_shard_238.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "fde46571de8b1829bf5efac03918bd9c"
},
{
"dataPath": "params_shard_239.bin",
"format": "raw-shard",
"nbytes": 284295168,
"records": [
{
"name": "output.q_weight",
"shape": [
92544,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 284295168,
"byteOffset": 0
}
],
"md5sum": "55f61b9267d7e0afbe4a1198dbd3d3b0"
},
{
"dataPath": "params_shard_240.bin",
"format": "raw-shard",
"nbytes": 35536896,
"records": [
{
"name": "output.q_scale",
"shape": [
92544,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 35536896,
"byteOffset": 0
}
],
"md5sum": "89f25a89c637723b976edcd9b8abe8bc"
},
{
"dataPath": "params_shard_241.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.7.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.7.ffn_norm.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.8.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.8.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "4be47898516de3750155af2c09274973"
}
]
}