{ "metadata": { "ParamSize": 285, "ParamBytes": 2029049856.0, "BitsPerParam": 4.500558312813599 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 197001216, "records": [ { "name": "lm_head.q_weight", "shape": [ 128256, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 197001216, "byteOffset": 0 } ], "md5sum": "9e03a6cafc1c0d9458e66ff754035d5e" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 197001216, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 197001216, "byteOffset": 0 } ], "md5sum": "9e03a6cafc1c0d9458e66ff754035d5e" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 24625152, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24625152, "byteOffset": 0 } ], "md5sum": "14c368df6c92958cc8443f91912a123a" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 24631296, "records": [ { "name": "lm_head.q_scale", "shape": [ 128256, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24625152, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 24625152 } ], "md5sum": "a2595d933596611b75e12fdcf32edef0" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "524276bb125ba33c0ad96fcd4edb5aa7" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "58e4b522b3c679ae1af5d8f8d8db96dc" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2ca0a8672765627fcf23fb4eb061a39c" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "93b8358bd579d6da1838f88cdf561164" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9d859452d588c9f95feec156d8001938" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "d648d8776f5a657716c5c32341ac74b2" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5962ebc045366b2fd859d4c05e6ae786" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "e6de378e45a871ecbf13782484307ae4" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ddeb1edbb4fe56d4054068c4129f9e0b" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "347fecb0ce62bd15afc0985bb4ab08f0" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "924e38867b70718f03d1c604514508dc" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "b6d647d4ad1fa816fbcac6d6a01d861e" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6f443ac94bc32ff2837aab94b0f81df3" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "a07facad40cc3badd040b84bd5b141dd" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "70f8454f43f9b9f5f2d22b934f834042" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "98378c057a0f436e2c280988dccb06c6" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "455112a713a168c49a9f6cf9a0ca0b5f" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "97affc9aeefb831d6684f4f5aa88fd73" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0246a0cc338b719d3fc53ad5f16c5a64" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "2a06b751144269cbcebb4a3b6803462e" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "97aa11fcae840c2e2c3a8eb3dc8e736a" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "10911145b3a7ed54f9d5241733ad74f0" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "54d1a815980f21e4063b35ff48ffc24a" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "7eb025e0d89aafc5598dbea524ccc072" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "229a9ce33709e3b5c0fbe2b3f226b9d4" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8f5db22b9a12886aa9bd1b7744bbfdc7" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 31463424, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 } ], "md5sum": "b6ad83133421b8cacc818d58cc79825b" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "abca7ef61fd7e810c5261f2a5b6ac698" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 31463424, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 3145728 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 11010048 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11993088 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16711680 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 17307648 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 29890560 } ], "md5sum": "4576737e1d31d94e5c8559ed2124bde8" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3d9d3d36dcc869f46ba11a543135d7a1" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3145728 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 3151872 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 11016192 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11999232 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16717824 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17307648 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 17313792 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 29896704 } ], "md5sum": "f06e5f397823cc9a8140590c07b839dc" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8ba23cc4aa73375f9346fee5e331ceef" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3145728 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 3151872 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 11016192 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11999232 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16717824 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17307648 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 17313792 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 29896704 } ], "md5sum": "1df4ad139944b055aec372c08d01b0ea" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8776ddf30cb9fd208425807750c7e686" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3145728 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 3151872 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 11016192 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11999232 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16717824 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17307648 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 17313792 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 29896704 } ], "md5sum": "4341d51059a1e7512a2fced683a06d16" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "23a38ce8dfe90c87b650edd93e525905" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3145728 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 3151872 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 11016192 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11999232 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16717824 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17307648 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 17313792 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 29896704 } ], "md5sum": "dcae5669fd4b1891b4f14c1848aff90e" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "926d14e44e94ad24eb5bc2cb7e591e90" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3145728 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 3151872 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 11016192 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11999232 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16717824 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17307648 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 17313792 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 29896704 } ], "md5sum": "92a35e65662c01ce92ca327b2fce2740" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "dd75587b38446b82dc28ab17bc0f89a6" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3145728 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 3151872 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 11016192 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11999232 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16717824 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17307648 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 17313792 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 29896704 } ], "md5sum": "6e25975ae10229e81fc2ed7040636205" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 31481856, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3145728 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 3151872 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 11016192 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11999232 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16717824 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17307648 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 17313792 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 29896704 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31469568 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31475712 } ], "md5sum": "e613b279ae08813fadc8c2edddcc251a" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a6d6f399a68c69283d8f99e1159b2fe8" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "2b46cd9a938d4fe29dff35e83b224e81" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "097b617d3492d0ece72baf596f0066df" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "0202aba9e3ef2a95ea37829896516aa4" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "519f9800da839428537f318c661720f8" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "a8e976fc43c4fc21e85f8678a9ee2619" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2c358f52d88ec689ce319b16773c4d7d" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "513398de82fe6892f4c8596104980420" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "19fa4a66d93359e8c9c75a6c69988f2c" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "3fd8861702c0fa7dbd181a375d68ff06" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e55f44c8e5f4d62bb8078f667e2e3914" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "17313698812caa04826f162a3b6ba7d7" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "186440a3b9f7b8e79bc07f20a54db807" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 3072, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 16384, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 5120, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 3072, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "9e012ddb73535c65fafff79ab8d7ac35" } ] }