Phi-3-mini-4k-instruct-q0f16-MLC / ndarray-cache.json
mengshyu's picture
Initial commit
5851c13 verified
{
"metadata": {
"ParamSize": 195,
"ParamBytes": 7642159104.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 197001216,
"records": [
{
"name": "lm_head.weight",
"shape": [
32064,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 197001216,
"byteOffset": 0
}
],
"md5sum": "48a04baa06c8140d1ae4fd61ea2a51b1"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.21.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "2065d9f788567547a774f5ffb9f1131e"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.21.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "cb9728ad4fa6db615b1caad12129df66"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.21.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "93eb83c12f0c5e8a7c3108b394c91f84"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.22.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "608636bdc84e74e9c06edc92b184ad51"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.22.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "016c70a051cf51d54861397f6e389ba0"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.22.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "6fda1306b81ea8a098567dc6d73b273a"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.23.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "9fbb3dbb48f84112a1cfdd1a3dfc1309"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.23.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "fe08a9496e9398f79dae924ceb0067eb"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.23.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "50bd13a3e1b4f4f677030aa126d58003"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.23.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "a4bda16fe3cd9cc9a45a679920864915"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.24.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "a7c2b8f5bc96ef8751dace0f4943eadd"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.24.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "ebc48a19827d58234b2c8bf0681a127a"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.24.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "4b73dc6122a84c6529a5f0ba77b6f9df"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.24.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "8067191930fe75a1d628d5cd61a0bd37"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.25.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "4600584c01f8b59b15bbb39157f83ea9"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.25.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "dee6947e469b73541a20200c4dfafc60"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.25.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "48e123914a2e6b87cf0f84b551c39274"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.25.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "c97246a9dc9042ec19934b2cf8b30594"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.26.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "cef6f7e1c2aef1f38f38ba9f0cef500d"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.26.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "491dd3fab5b55ea5b79732be5ac73ad9"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.26.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "0cf916f8ba3a5b21acb9979546b353c8"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.26.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "a09a3bb9780b39448747ff97bfb01f77"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.27.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "bc70e157266b370a65b2260fb956ef05"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.27.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e3049c1414aa0312b11252bc0df8e012"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.27.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "a3cca75a3b6fab52c9e6d2925ed8c740"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.27.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "a8a572beb8324e103d72a1c492b447c8"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.28.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "838f4f01a65960266efa7b3dcfcd6f62"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.28.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "98d5793d5b479999176cc86adf256624"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.28.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "54d4c13f9f91500f02e4e550a952fa5a"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.28.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "07cb6d2348769db041143c3e0774ecab"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.29.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "b292bd2096b63ce0b3a69d8017346d07"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.29.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "710dfac38663642c90b81a90955e53f0"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.29.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "184397f1a9cfa93527bd0b47bfc0f4d9"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.29.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "c0b0cdb47135abd8be76c8cdf519687b"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.30.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "26b1c36720773b1715560a3380cfe5e8"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.30.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "65ab4c77f8d88808327b115184c44056"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.30.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "0a969f022b81189c9675ca624838bb7b"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.30.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "1f1118702f5f96cb08d2eadef0cd73a5"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.31.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f6391b80aa186aca2df8d816b3d5d20b"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.31.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "cc354c852575d11406c100e4ecb6a360"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.31.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "2f75fd301b667261b7b5cd83e306c61f"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.31.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "e0774dd3e2453472fe59ba3797255c73"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 197001216,
"records": [
{
"name": "transformer.embd.weight",
"shape": [
32064,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 197001216,
"byteOffset": 0
}
],
"md5sum": "2794efed4f90da2679835636c49453c1"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.0.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "aca7213bce912376770af1f9a7805546"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.0.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "7d45b5a1d117d0fb2f97c2566b85c080"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.0.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "b29c48242ab28d7751664fd917cb53d9"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.0.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "43ffb7ce1c9bb16f42f8cd92533105ac"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.1.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "fba6c01b38085811a0b169a689e9a8b0"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.1.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "754739f276a532078fb8adf8a358a84f"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.1.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "b08e5972a3ee4dc0a5d2446c74b78667"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.1.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "6a326d18c9a02a0ff844de3209d9d239"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.10.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5559680d21364578cee8b4f3b884c387"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.10.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "b5a2560d04db862c4b0b44d480b3de11"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.10.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "d7b4d5eb54d90c8b88cfcdcb7ab7a6d3"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.10.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "a2f33fa311c565fc0ad4b0b2ac9c24cf"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.11.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "2ea00513628cede454214266723b73fe"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.11.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "6f031b6a01f57cb949b87939fe3a6fcf"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.11.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "7629386137cde4883fe6092961628908"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.11.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "74f94e083aa4730d2e928a652bb93aff"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.12.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "fe1dcf39db8e8c03468b5714d7574f1d"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.12.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "c55494e7965fcddc67886f2719fa4f28"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.12.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "01823150ed94ebda1ac03ff514674d59"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.12.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "f7606df9f1f8a7ae529f3c3d84812cfe"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.13.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5efd0889d320559ec7d2587d88560310"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.13.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "fde3986982d33d33d6f9033954c4f76b"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.13.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "2d6c2ac2c20271403136de2b690e2c10"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.13.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "f3e6c1bc7e706a4ca274baf2bb296a3e"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.14.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "a6206e415bbb24b99496af3833bde153"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.14.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "ce3c652a54a9d59b42dfd107c214fedb"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.14.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "08432e0cda4f3d4efb5af8226d292545"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.14.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "4da8e23a88baea7d245115482c783b88"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.15.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "61fe1b44bf673ca96c6f5febd6dacaaa"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.15.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d03015371c3aab576d7c554299a64a19"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.15.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "ba6b9594289c02e4bd8405905e3b9345"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.15.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "30b9eab0df51695103e0d2c8281853d2"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.16.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "a87a68606ce19491b283ebfcb4f261e1"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.16.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "3bb222858ea27693a823cc6e86b89394"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.16.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "aafaa8e5e6341a8b186315f30b33eeaf"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.16.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "c0ec806ad19c5f93174e13e77343e9d1"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.17.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "fa41b02a47344ce12ce2462ba41cddae"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.17.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8a5537e9896e145836ebf99dbb1d617b"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.17.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "56aff7c581b6f3aa14835e0cf733a113"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.17.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "bcb98fa15aca763650cfb90aad95ff8a"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.18.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "3d3b925b996f918f06902cb77831e82b"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.18.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "006e594d0ec96f0a2b3824eeabc4405b"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.18.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "73d5f7100db12528051f7f19aede3d60"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.18.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "9f9e0dad8db22348008c009241057bba"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.19.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "64cba4f187a6c45a05dc06853132d7c4"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.19.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "a10d189b10ace93718ab187f3f78b402"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.19.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "6a8801a5037374d1317431d8ed723218"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.19.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "329a57ff77416e0507f6fe9e662681e7"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.2.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "40578ab77b8ea1571d74b9a25d46d821"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.2.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e88194e0aed044702686ba4edd832d5d"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.2.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "baca8426aae056bc7a3ed32f5f7fa3b1"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.2.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "43077c2fdc9511c9bc7750d89ad53321"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.20.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "16c500aee75a3f6fadfb79388040ce59"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.20.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "35e566f071950f559d9a68ed651cdc0e"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.20.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "bc4c11294dcef27da24c0d0c41cddb81"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.20.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "6c09843d6224c0c491c11052703711d4"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.21.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "5301ec982f0f0bee7cbfd66c5f10db1e"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.3.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "ad69d774f1ef8d178b241ed77788034e"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.3.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "71b49e1b1e72c73d95a73a80ff85cf56"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.3.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "0a14557ee27469ea0c91accf21f1882a"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.3.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "1f612196539cf259ff0ba018a1d3fdd8"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.4.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "125b30cce73232acdd57017b751b8ec0"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.4.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "174b431f56124195a88de449325221d5"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.4.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "f47846816d3009156669ef65dd815b9c"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.4.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "fc5eab9b081abe8d88f8713d7340a33e"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.5.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "57feb42a7dad4058566e19deebdacfdf"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.5.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "122ada6175d0fb97ac9bd9c1c28f4a83"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.5.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "4c27a252315d601d17df5b45c4ed394b"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.5.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "73184c595beddb967a0a1fb4adb8a697"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.6.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "e4e3eed244b4ffe0542a0e6e950d5c23"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.6.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "4f529e42dabff29fb4432f509073c9e0"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.6.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "48c4aca8ffbb252d17c5f5effb09a532"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.6.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "8a0f2cc6cd07a2208027ced9f52c3ad0"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.7.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "9657e1f37ef7a3a950ff28e1b1c8d288"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.7.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "6c071534349e4c2b500aa75b5f207769"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.7.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "8e49ccfc80aa0ffe5526847f8c955f83"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.7.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "db54d2b3c23b16af51bc756ecf127307"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.8.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f9ab4d31e88e01d5d03b3cc9de36a71c"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.8.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "13f0af026f446e192ec681cae51413d4"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.8.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "084906da91b4132050bf217661c40b19"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.8.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "e5c8f3b8d7bc024599ea7c12e86e12b6"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.9.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "eaad0fe95f377541e343e2b65c526166"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.9.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f32f91ad037860d6cd65911faac9277c"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.9.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "442e739c1bd8c0ad4a7528a629d4f556"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.9.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "cba65d66e4a5aa25c15a78e798ac7d5f"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 19273728,
"records": [
{
"name": "transformer.h.21.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 0
},
{
"name": "transformer.h.21.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 6144
},
{
"name": "transformer.h.22.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 12288
},
{
"name": "transformer.h.22.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18432
},
{
"name": "transformer.h.22.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 24576
},
{
"name": "transformer.h.23.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18898944
},
{
"name": "transformer.h.23.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18905088
},
{
"name": "transformer.h.24.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18911232
},
{
"name": "transformer.h.24.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18917376
},
{
"name": "transformer.h.25.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18923520
},
{
"name": "transformer.h.25.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18929664
},
{
"name": "transformer.h.26.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18935808
},
{
"name": "transformer.h.26.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18941952
},
{
"name": "transformer.h.27.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18948096
},
{
"name": "transformer.h.27.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18954240
},
{
"name": "transformer.h.28.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18960384
},
{
"name": "transformer.h.28.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18966528
},
{
"name": "transformer.h.29.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18972672
},
{
"name": "transformer.h.29.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18978816
},
{
"name": "transformer.h.30.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18984960
},
{
"name": "transformer.h.30.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18991104
},
{
"name": "transformer.h.31.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18997248
},
{
"name": "transformer.h.31.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19003392
},
{
"name": "transformer.norm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19009536
},
{
"name": "transformer.h.0.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19015680
},
{
"name": "transformer.h.0.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19021824
},
{
"name": "transformer.h.1.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19027968
},
{
"name": "transformer.h.1.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19034112
},
{
"name": "transformer.h.10.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19040256
},
{
"name": "transformer.h.10.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19046400
},
{
"name": "transformer.h.11.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19052544
},
{
"name": "transformer.h.11.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19058688
},
{
"name": "transformer.h.12.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19064832
},
{
"name": "transformer.h.12.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19070976
},
{
"name": "transformer.h.13.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19077120
},
{
"name": "transformer.h.13.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19083264
},
{
"name": "transformer.h.14.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19089408
},
{
"name": "transformer.h.14.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19095552
},
{
"name": "transformer.h.15.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19101696
},
{
"name": "transformer.h.15.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19107840
},
{
"name": "transformer.h.16.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19113984
},
{
"name": "transformer.h.16.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19120128
},
{
"name": "transformer.h.17.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19126272
},
{
"name": "transformer.h.17.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19132416
},
{
"name": "transformer.h.18.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19138560
},
{
"name": "transformer.h.18.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19144704
},
{
"name": "transformer.h.19.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19150848
},
{
"name": "transformer.h.19.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19156992
},
{
"name": "transformer.h.2.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19163136
},
{
"name": "transformer.h.2.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19169280
},
{
"name": "transformer.h.20.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19175424
},
{
"name": "transformer.h.20.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19181568
},
{
"name": "transformer.h.3.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19187712
},
{
"name": "transformer.h.3.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19193856
},
{
"name": "transformer.h.4.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19200000
},
{
"name": "transformer.h.4.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19206144
},
{
"name": "transformer.h.5.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19212288
},
{
"name": "transformer.h.5.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19218432
},
{
"name": "transformer.h.6.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19224576
},
{
"name": "transformer.h.6.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19230720
},
{
"name": "transformer.h.7.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19236864
},
{
"name": "transformer.h.7.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19243008
},
{
"name": "transformer.h.8.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19249152
},
{
"name": "transformer.h.8.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19255296
},
{
"name": "transformer.h.9.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19261440
},
{
"name": "transformer.h.9.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19267584
}
],
"md5sum": "f066d7c15dbf7a42f65846ff50a0ae06"
}
]
}