Qwen1.5-1.8B-Chat-q0f16-MLC / ndarray-cache.json
riczhou's picture
Initial commit
c832d6f verified
{
"metadata": {
"ParamSize": 171,
"ParamBytes": 3673657344.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 622329856,
"records": [
{
"name": "lm_head.weight",
"shape": [
151936,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 622329856,
"byteOffset": 0
}
],
"md5sum": "83c85f622c9c298d6de5cd6d58789736"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 622329856,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
151936,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 622329856,
"byteOffset": 0
}
],
"md5sum": "206ebbfb149647701dbeb218436b7d8f"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "09c446e956ab6d4a147bd40f2de7bea5"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0292eee7b82a5b618c0ccb911837b204"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2bc2c7e7d9bbab5dae35f6341df3d5d4"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "223e4f9d1bc3272485ebef2b95d1f1f5"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b6d9f3d75d83c86c7a34fabaf5a622dd"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 30973952,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 4096
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 22548480
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 22552576
},
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 22564864
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30953472
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30957568
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30961664
}
],
"md5sum": "64a2cf6c36a6ef5aaf9da3622d3eb666"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "5facbe96fed8417fd67054abece53006"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1511e9775e7b1fb03f1096d48b60301e"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "801c9302955ade10a6a38eea1cdd663e"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "79cfed7dc713b299f61b18dee20376dc"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2839deaf8ac8792833e0a170eb677773"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "56dbcb95902d10b2165a68fdd689a853"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "750ea469ec1790dd12fc5df63245f36c"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "53d7deafe41c7cfd26cdde144f001b84"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "d5ab1c2914b7b5fb9463ae06c17ad62f"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "c818bbd34597e1adc930a21bc1281901"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8cb5f8dc0b1df94cdc4758972b8bcb79"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "81ea46aba7c4040e488eb1f53eb85395"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "c3fe540355374d554e380b67b9130e6d"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6acccdb7b8091022a954b0629df7a8d7"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "1c4e8ee8dfc04f527e3b04441cee0621"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "445a73f350f95f8c6aaf470a0648aa41"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b5f6ab967a5a9748e5b9ae7a939352bd"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "d7692d14fdec212853e321f5849025fd"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "c9d31c64260450a8c520a43ddba6ccda"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c1404d446281f41b4ab60e586ebe42af"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "33d7615d5499f44e57ec03c95ab2b5d8"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "b000699d16122be07193b657fce01e3b"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7edca338357f18752de1c9a894e7653f"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "9854d2759e328a0230b7d8f7623dc54a"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "9c8b9e17393d828088467705cb6f6825"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f37dd3700560972a3e624e0ee4beb0f8"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "7a8438b7fdb6b3fee35d8372f1f9df80"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "c376d9da8152e4b47d955418afcfa6f5"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bdc65263820cdccb8af74c212d500cf4"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "17316b02f50d215eee2c183605a28455"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "a6739cd336bcfca9fcf694f4c859e785"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "52be86752d8b85e4eab1baf5ff2dc06a"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "45412cba0d7d92875a4b66dceaaee000"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "f376a2446673e2884216fb480defe9a4"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "58e5c1da262bdbc658646cdb25586e50"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "9cb5fb5ca40f3c44b0892935edb590c8"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "e8dc7a080286b770f450c7b8b9233d31"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c3a070435a1738702f4fa97639b1295f"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "dc5afcb289c1193402fc5a0d3ebe3b1d"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "1e6b00aec599d853df3baa265d4a1690"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bdb912d45a0626cb91a0da0abbfb84a8"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "de46fd346a15c41b42e67b3d0ae673c9"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "4cb92d3031b8a8d7e4944d6d6d26af3b"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8b7e38cd6f60865a1db8034d27eecaf0"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "8f04f524cd1d8946116a5751ebfbc85f"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "401108ec17a0721237c7893d86405968"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "131f98baea5d8cb84ede2ad0bdad81e4"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "8c25341395fac238ab0fe7eb58432000"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "1b4069779763ce8e9ded3be2093aec82"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b7202732924be475d50445bd09148ebc"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "87d54674433056fb7111710ce9ee1da1"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "e6d8a89c39719c8894be8677b84c34bd"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "914748c73b153225064d5473a71bf480"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "194c16b3e951936f2662d8060d5f7163"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "0d2929123d475dddbf5348e7c3bca32e"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4c16d2f3f5523fedb224ee355179c4bc"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "a4ae8bed4420f2ccc51bd6d26b80d703"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "ac79b2cd07190cc8f27f6d39447d8008"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9c74c6e83c316439766dbabd9b4bfe61"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "189cf5ee330a2d298e3bf44a5d79c926"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "f82bde1caaf69d79b46d16c9f3630470"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "04b1b10c63c030e8b4461af7528fddba"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "dd4892ac9920883590d988c7a442ec0e"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
11008,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "396e3d7bfa6e3f6cda19abbb40b9c81f"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.weight",
"shape": [
6144,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a9fe1c41e97698293f83322043b116ab"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 30953472,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
2048,
5504
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8392704
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30937088
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30941184
}
],
"md5sum": "9c66125e43d4a5db730097c03f9afc4a"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 8392704,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
}
],
"md5sum": "9139274d063a3faf7d47ce9aaa9431a7"
}
]
}