KLM-hf-q4f16 / ndarray-cache.json
Liam-SC
Add Coverted
0138f52
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 3790741504.0,
"BitsPerParam": 4.500454373872803
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "9ffd22847d99f8ff46488ccd88591e9f"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 30744576,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32000,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192000,
"byteOffset": 0
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8192000
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8200192
}
],
"md5sum": "6bf9b9c19d9e17897d11fe3dd7a30060"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "40ba84715fad1d7e0cd3b903db1d1224"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a61eb898c98a99b7389a2f74e4fb8949"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a60870fd76262b116c01611a773f953a"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "2533b45bfa168ac5ca321f3d677fc5bc"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5e8ad798ae3295d8f750524666a17813"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "867d1b0332f5faae38e57255ef33f531"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "a3c51c579d2839394aab68911e214147"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "28c195ebdc7c5393e0b3e1b0139ab09f"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c4af121376de0702802ad8d58797cb95"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "0d5de73cebc12ad8a0e665bab30596a6"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "a7dba97760f943a4b849b18b57086a67"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5a8d497c821fa7af98ccc6dc8ed7689c"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "7c220dc4cdf8f1d57d9e6d5a540b4a03"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "b040a857d13f91b767f0eabfb2426157"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "1486034f5f397f6a48f460a7df8c7e8a"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a489d72a27671d7295a9f368f1b6a26e"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "992c9812023adedcfc8f3ef5caf58f09"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "e7240105440eed571be6eece382c1c39"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1c980b8056cd894e4a70cbd056423f72"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "c84569c1a24ebcf27e2b9bcba102aa1a"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "08ccd092a2f69643664b8fbe2b69c876"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "c41ddd36e63d558862e319899462ec44"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2bd8eea3051e8736465dce73c5c71e71"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "41d7a983d9975fc13bf49fbe9209dd6f"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "dd0b4507a51887a03adfcd40baa12179"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "52ff159f43be3ef3db14c3309016c9d7"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "41ed61742b34ea7d8233be6a5f8bcc2f"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "0f21d5f46385d789c5cd29a932ca5c11"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "3953dd5b674620016195a180f22408f2"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "976d89d944c4913f3c15ddcf8c623404"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ff57ea7af4075e2473e4d8dc5e97e5bc"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 29253632,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192000,
"byteOffset": 9445376
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 17637376
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 17645568
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 20463616
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 26099712
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 26107904
}
],
"md5sum": "4210e0749ac58c8a6fa59e5c6fda6941"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "83f01da95ed65d12b7695b149554e284"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "ad0147c0c3f948e1f21d7745273d661e"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1eeb19e2936aae84830d2af1b56770d1"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "389a59414a3cb6b1d1089d6220ef274b"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "fc212d784892e51d1b339da52bee06a4"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "65fde8605cd026598ec626d9c50853f8"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "ba59f310ac8d77c846402113ba1d35af"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "ab45854bfa4fb305fe3777ab3a5ff1ec"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "b6c0752f5cfc1d02554f26dbdfbfad30"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bb0b45daeb838a0b78d8ad0085f12d8c"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2b7dee9443632e7dd5efc5e0a2aa2b7a"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "8ac503e0063b7ba6c964dd9df56311e5"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3e87e1686a0ab5e4ea8f057569806e39"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "5c3f6572a3a3cb586ca4eddb851763b7"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "b2a5783530a93e67db97a33e6efc1aa5"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "605106a9413f06ae77038f8850664a8a"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a2deaec917ab6290abf5a4200336c4fa"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "ab3db4aa0ce5dbc0076822b4b7430203"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "785c16dc10915917db0f11f98fab31a3"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "770f7b246fa261d0ab25ed04e0dcd9cd"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "4e98cfbfe92a4889d6ef80ff8d588db4"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "5e0a1e0f312ece9977a5fb9a5fc8a2d1"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "ea017d9ce4aeca4327c27e3f89ce0970"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e1c24a32e6d040b0fd01931908323362"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "232149574881126d58354234151d54d1"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "6366383a7c3a1564984e00555df4a09f"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2fcca49e6746981b39cd89c5b19ae879"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "deed1cdaae4816729a59f90fba153a54"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "485f0fcee95450538a8baa8362c89f9c"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "abfc1fcd3f4d01900211ceeb55de8791"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "fd90979265aaf71410c30d68fff77514"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "977eb21fd7ce0391992abfa65a666099"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "3169d0823914d82441ec8b52bd1725f2"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "545a1ad19021d142795d0fdb16cc60b5"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "ad325ee6b4466c81d85123acbaba89a1"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "ec5b1931053761ec3565f5f0b47f95be"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "8bb7483fbe69171d6cf9783c344f9276"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "71527b0309e84905a8a3420f1520d70b"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "535a81edefae097a15f17e7fd9956240"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "9993e6fa158d789db4a691c9665e47ca"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d8b693438e5546aa06ae64c2e67555ea"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "827d16ceb3913c4a93978258cddca4b3"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "6575a55e0b3f051baf193f1a0ec1ca34"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "55016c532e34e116527989344871e231"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6a7a81d7fb10a2696dc5c7e46510da07"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "e1b4d5336970b23269b1a02a4306a7f8"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "a12274d21f49e67a4fbafad0bde137dd"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "399a269c760a74a8bdb08f24f3496e82"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "df1aa28760dde0a75641de7a88a34e0b"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "fc381f971d3f49dd82cd4fb9455fed83"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "1f0eb9a3c929844f4cd84acdaa7d9884"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0906337f5512935d09d2576fcc623791"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "098f13aeb71692a7fe5224b3b070875f"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "8ba1e11a997c28f94c9219afea1b6d30"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f2623a73517248b8466c30c9bd628a9f"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "e51081f1fe6f23101acdfa30dcd8d2fd"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "f82854074955dfbbdef8e552427b5d05"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "6f541868776b97104ff56c970ff93b36"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f4f5aabb367f8e3a4a989942c5da7eb2"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a0c45e05833a11d9652eb3a6df08a546"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "0599476a93c11341173daa4816a97284"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "65085a432dc011f81bc068162e9820ed"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "66952007d8c80bda688f680f37b024de"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "c4bf6d12614a5f31e524cf84fb5f8a3e"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "4cd786e17594639a03d7492bc72432e1"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b2ceb5fb1751fad8bec17a3d8d7152e4"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "15ddf7ee916f2b196ab0d745910d2a04"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "2544f0a22171cd805241d97f99ce668d"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "73fd31a9180c705217723bb8cec91b41"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "8515cf89358b416a19732d3138bfb164"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "6bca4e99c40175904d701c76331c46c2"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "dc010e83779f200831221f5ef812c5c0"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "445813642552dc78c5f2b6ec0254ecb7"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "538e3ed8b498158b7edf4de549d96872"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "fdf605f3147b26361ae7bf78fb968804"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d52bd3edce8d11912571dd5fec887e33"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "a8a0028fd2c530fae0633f340e034983"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "adb1d9a5cd54eec93d01c868845e80c0"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "5431c67bf8fcb85494acd7e5cbd26c5a"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "834df5807b9156f71ed3de34347dee8c"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 21045248,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
}
],
"md5sum": "571f3f72d78c60690cfb82a936825642"
}
]
}