mlc-q0f16-h2o-danube3-500m-chat / ndarray-cache.json
Felladrin's picture
Upload folder using huggingface_hub
8019b40 verified
{
"metadata": {
"ParamSize": 99,
"ParamBytes": 1027181568.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 98304000,
"records": [
{
"name": "lm_head.weight",
"shape": [
32000,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 98304000,
"byteOffset": 0
}
],
"md5sum": "4cacaade4fa9480fcb128b01802010bc"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 98304000,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
32000,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 98304000,
"byteOffset": 0
}
],
"md5sum": "128c3e824ae1c77e76b7bebec282b68c"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
8192,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "73308bcc241ddfe9a276e1365343a9a5"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 26747904,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3072
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 12585984
},
{
"name": "model.layers.0.self_attn.qkv_proj.weight",
"shape": [
3072,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 12589056
},
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22026240
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 26744832
}
],
"md5sum": "61ffd05d3b598a4bf392b2883678c6c8"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
8192,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3e08d4f83d0234b8f04ee5c08686ec81"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 26744832,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 12582912
},
{
"name": "model.layers.1.self_attn.qkv_proj.weight",
"shape": [
3072,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 12585984
},
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22023168
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 26741760
}
],
"md5sum": "ac190343ae5018ae4ff4b3c72668e64c"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
8192,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d150ae0a002e81aa75d3a9869255318f"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 26744832,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 12582912
},
{
"name": "model.layers.10.self_attn.qkv_proj.weight",
"shape": [
3072,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 12585984
},
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22023168
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 26741760
}
],
"md5sum": "729fdb8f0eda6fb8be72c549a265b99d"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
8192,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f113f6c464d7323085f32cefad26d060"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 26744832,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.qkv_proj.weight",
"shape": [
3072,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 12585984
},
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22023168
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 26741760
}
],
"md5sum": "a94367f453649a530637805bb686bed1"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
8192,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4c7e4e11421a7e78191585cf85e6f492"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 26744832,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 12582912
},
{
"name": "model.layers.12.self_attn.qkv_proj.weight",
"shape": [
3072,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 12585984
},
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22023168
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 26741760
}
],
"md5sum": "f7aff300446f28f284dfba1c059f1df6"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
8192,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a1c8805615170e587880431364849a04"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 26744832,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 12582912
},
{
"name": "model.layers.13.self_attn.qkv_proj.weight",
"shape": [
3072,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 12585984
},
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22023168
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 26741760
}
],
"md5sum": "a2759da9f1b7863921b4196085a09084"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
8192,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2da23d083df683c1dcae490831b95460"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 26744832,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 12582912
},
{
"name": "model.layers.14.self_attn.qkv_proj.weight",
"shape": [
3072,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 12585984
},
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22023168
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 26741760
}
],
"md5sum": "0d74164cfbfa518ba337ccbfd5100d28"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
8192,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "33d9a7a41dd0f541307ee90b4ad70328"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 26744832,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.qkv_proj.weight",
"shape": [
3072,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 12585984
},
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22023168
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 26741760
}
],
"md5sum": "2f7d100b62508b23d809954b86c1ee2e"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
8192,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "30c70e6edd714d2c85f9426157e8c80f"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 26744832,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 12582912
},
{
"name": "model.layers.2.self_attn.qkv_proj.weight",
"shape": [
3072,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 12585984
},
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22023168
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 26741760
}
],
"md5sum": "3fb2640e234b84defda32dfc1a42104e"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
8192,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "53150dde82ef99fd9e1d6348d946f2fe"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 26744832,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 12582912
},
{
"name": "model.layers.3.self_attn.qkv_proj.weight",
"shape": [
3072,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 12585984
},
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22023168
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 26741760
}
],
"md5sum": "8d367d8f6d1b43767339847d87c0540d"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
8192,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "cfaf8058ee5a8a20f44eb27a0b7b573f"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 26744832,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 12582912
},
{
"name": "model.layers.4.self_attn.qkv_proj.weight",
"shape": [
3072,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 12585984
},
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22023168
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 26741760
}
],
"md5sum": "a165373f294ca4281380a26274ac9664"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
8192,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "31865399476b425d4ccea3d19c707990"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 26744832,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 12582912
},
{
"name": "model.layers.5.self_attn.qkv_proj.weight",
"shape": [
3072,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 12585984
},
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22023168
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 26741760
}
],
"md5sum": "7cff33dad5234670929d6ce9d8f823ea"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
8192,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "567b76989079f6b8afb9883c30f21dd6"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 26744832,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 12582912
},
{
"name": "model.layers.6.self_attn.qkv_proj.weight",
"shape": [
3072,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 12585984
},
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22023168
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 26741760
}
],
"md5sum": "3bd295d16385959c396de51b03ce56e1"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
8192,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bee870077fa362451ffe490753151967"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 26744832,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 12582912
},
{
"name": "model.layers.7.self_attn.qkv_proj.weight",
"shape": [
3072,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 12585984
},
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22023168
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 26741760
}
],
"md5sum": "9ef1e19c27ba9772c11e5876693706c7"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
8192,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "01d39adc4287100619cd94e0c84e4c61"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 26744832,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 12582912
},
{
"name": "model.layers.8.self_attn.qkv_proj.weight",
"shape": [
3072,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 12585984
},
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22023168
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 26741760
}
],
"md5sum": "3806098b62919ec3c4deb773c46d177d"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
8192,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6613b12dd755ddbe02dfd8e7640d1b6b"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 26744832,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
1536,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 12582912
},
{
"name": "model.layers.9.self_attn.qkv_proj.weight",
"shape": [
3072,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 12585984
},
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22023168
},
{
"name": "model.norm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 26741760
}
],
"md5sum": "70dda49a243e5c0a7bf48b85b8a426a1"
}
]
}