{ "metadata": { "ParamSize": 805, "ParamBytes": 38801408000.0, "BitsPerParam": 4.353045149919394 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 131137536, "records": [ { "name": "lm_head.q_weight", "shape": [ 32016, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131137536, "byteOffset": 0 } ], "md5sum": "1a6aa930cf6ebcbf431eb8979007e3a0" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c83377dbb465f85be41fcc9103f72994" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0044ff0ed1cbe01c2cca0b52d93f09fa" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0c23d8490f56951073bb5dac2c8ade4b" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3e5c7bef0802c6406fe2dc086df00dc3" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31105024, "records": [ { "name": "lm_head.q_scale", "shape": [ 32016, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16392192, "byteOffset": 0 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16392192 }, { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 16408576 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31088640 } ], "md5sum": "16f6f30c6e3b730f2030f9096d53b555" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "56ba4bbbf57348d81f879dc7734ef732" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6c6fa04c463b30220ff47c4d75355884" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f5ba9f80877ce021f44bc0b08f8f2c94" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c9154d5eb5a3294b5c9fe8ea5cec36af" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9b8eb8b2d667d3d32a7da7f668537218" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 131137536, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32016, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131137536, "byteOffset": 0 } ], "md5sum": "a1ffee69145fdea11ea3d3b0f6a3115f" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 28360704, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 18907136 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24150016 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28344320 } ], "md5sum": "f371609de565c93e1a65ed030b39facd" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c28988017aac833acaf830dfdec3bcca" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2def2012579073ceb5b66e96499f8bf5" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7c3fc2d96d6f6619fe1a2548e7c59caf" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2ff2d42a246f0bd68ebb997c65e15405" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 31105024, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 32016, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16392192, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16392192 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 16408576 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31088640 } ], "md5sum": "0f87643f4b37693aded439e869404f8a" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "07aab81f49040e77db5730395d51de06" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f1db8811a9523f83ecd9202b8787113d" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c87ce13e738a5c89e7e3a51c25812a7a" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e5dbcdb9179e69ff6d4c8dd59abe8757" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0d17c31196ac186b485c0f7af77bd7e3" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a29ef83550710b885227e26f7324370e" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24150016 } ], "md5sum": "04a8b4f9a6cf67cce3ae56862e4be2aa" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "eba46fee73614503c290800a988efcb2" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e9eba56282e5f54a887235d1304c3deb" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "74a64130df125fe9e939f8b58587f5b7" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9eb953adb9444b86fa98df2d4fe82055" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2f17d41196a5ad03340b7ee01d50c151" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4c9698b80e34933735238c766e013e4f" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6cae38a913c420fa6902f98c3c1adfb9" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3d0c6b0899d2add784460b4d2886052e" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 4194304 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 9437184 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13631488 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 13647872 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28327936 } ], "md5sum": "c6dc82bc0daace449d5fa4fcddab6d85" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b47486ee9b750f5e7583b7e78a84e1fc" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "71484c9c05567bba0fea0c08dd5d801d" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "07d19fd7878ff7d2a4cc1d184aa15d75" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fa9bcd8d47f0a18d5a4031b18522ef70" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2041e1fa3f778b5ed0e58e3d0ef8a297" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "07531aa18a6baee26af60db7e86fe511" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 28360704, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24150016 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28344320 } ], "md5sum": "83b3e8d1754d1e7db339d82d23415443" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ac7bab1ead91f651effaf6b4b05c1d3c" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8698873e6bfbecae68a46640ddb3dc3e" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "448981afdf61f0310a6a9cda32253764" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "82eacc5f0039f0e2d1efc38581d58095" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c19c9b455550bd728aa94b7bbd0dbc0e" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "5354fa5d54591690edd27f875048a0a0" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "987177880edc326bd5d76ae0e7828f7d" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "28163b15c26e56fb850a8273bfd4af19" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "eae6d00fd6a46f1833005ebe0ac5cdf9" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "91f8f70a3ced772194defbef25e412a4" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d7b4b166ca59d309ec0999657c076820" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "345602dc2ddcb80eb9584a275ad03a7d" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ddb1ed1e09e5c622cecf307859dc3bbc" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "52ba37eacc371f5f3ab50c1bba4b0a66" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a4592e89ba56556309306d82bf23927b" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "597655d872f8760a1a3a19ab18adbdf1" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "63953849148abeef6485e6cfd4f025eb" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fdff4a2f85dcb9573ff4bb21b7c98838" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7a684c107599b0de32721334b44d2fd8" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "775fd033f0f021225774eb82da97eab5" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "fe6006f175e4da5abc79d22569e44733" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d2293dbde4cdd38dacd85f7b482c81e5" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5273d4b29b7ed5be589418ca96f3a3e9" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "11e5b64045be506d713015443f0927d2" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c8a395308fbe111a541b376a06f91245" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ecb6bb5e91a6a78abf274c3cc19d823b" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5d7ffa12296886ba92be1360765538f3" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e7714167a35e720da80995795ee7d63a" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "390145ff7f4331f9e80fdfaf573c285a" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "dd0ec4352d345e2aa41d9391690c724c" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "87e9af0ce3dde1906843a5d3b03ad024" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f434779633839ab77e4c4e21228aba72" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e8a51a0e4563ec88fdf5906cc0e1be8a" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "303f4d64612f14ec753989e1bb70925f" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e796b3d3e3abf6f34117f24e5619899d" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5d7e955f4a4b5269db890a21844a3c0f" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "32c8505feda05f73caea1743ec69337b" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fa595ad42c8795c18bd3ced3b5420768" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "c77714b596e4276b39ae0c25714f728f" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6132727ae14038a95c3742a43868da56" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "501d3cba3a3c5b719868bedd800d78d8" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "9580554298dc5fe16cc95cbf1df2a6d9" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "857fea31d804132d881ef4e23ab6b7ce" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ccbccac47ee785bb6c4a795ccb480a97" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5f892550295fe1950bc05e448e0e63b8" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0c2f0e7acbdf9cca1f6723f8887f1702" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "25663a56433ee7d22c0c041dc050e1fc" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "070528c8c064222b93c5c23508ab6c95" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "11d0334b8aa634f66e9baca14c3f40b6" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7b1c30c604ce80609dc11203beb51477" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a9167e5b33d1bb88b61661932916a5b8" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "de5191b36144a50cb63ec780609e4d85" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "36c27a2acb406f4e28d42913de799ac6" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d57b397d30b00c1fcfa3b0fdaf0ed5e7" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "5537793bd96629fb7abf8d07534c295f" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0222571d87cf2fd230089ca9474fe6d0" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e1ac3555618a8d8e5ae8128aa2f31ec5" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d474725ab330c214a0e85c2edbd6ffbb" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6d57c156530d3481f21cef165cb0b1f9" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "67377f493fd1770f41a9373623eabe9f" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5c43c3c16a44efb4db2e4abb5c86c832" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "21003f63ec81b8285e05fcc5041cde36" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c07c30f35123b02f233df19679910691" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b626f9362254db3cfe0018e1f30652e4" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7bb1ccf0257b2633879c2792f4118ba8" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "6d69bfe69081d2dc13ea46c302448011" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "05d4489954545d889c4c86ca904790d9" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f676820e044153c1456acdcf86aa5355" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "24af9e9c63018fdfb50ff7600130425c" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "76e6e2dfb6b1ac5a9547511aa4fbff2e" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2b7af6591dc0a5688053b702c34046d7" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "341b86372d497394a9f7a9f71be2b008" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "69fa375ed7dec263740d5e40c15529ff" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "be98b15ba292bb71b7b3535a9b5eaf11" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "57cbb266316e5693eb77179a2f3edd7b" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ed3f2b36c06fd4f24bbe8d8533c9d91e" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "26b9efb4a76be03d415301415a6b547f" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 29409280, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 14712832 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 29392896 } ], "md5sum": "4306308ee69f8c392d56b9bf04d3c453" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "49ac0acb33c98d33bd6bb5032f44e741" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bd37afff60779fa19ad9c934f48702b4" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bc64ad4974f32bdc93de209f7f144b5a" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0bfe09ccfb07c84bd9b0956ce727759e" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "67fff70069dedcffe78b5d44537f94fa" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "367cb9e8896046fe80fe403964c12497" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24150016 } ], "md5sum": "c23de6ca617916783e44f7771e1f36ba" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3aa8b1f71181ce21f6e8bcd746b3a8b0" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d6529ea6c5d697d7f8adcc837fb9f0cb" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4d30e70878aeddcfbfda7b65d0b1d676" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f541e0a4984fc3ed5fda41b47bb5c7db" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8edcfa8a7392b5cc7ea20e2a545a49f7" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "536752e4da6371bffd5660d5bb644a3c" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "86c7f11ddd3cc7d041c92ea8a4388e29" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 32555008, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 4194304 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 9437184 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13631488 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 13647872 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28327936 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 28344320 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32538624 } ], "md5sum": "b185c36e876abf8d43ba84ab21a749d5" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f75acfb2d024bcd49c2fef2bbc2311fa" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8976c9cd5040da32798b81e4b4eb627a" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "54b7576c620045de02cb8aac3e404466" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "27d934a9e972a7004d35d73b048ac4d2" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "49fc35b4c206708544308593653efabf" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "06f5687e1cf5eefbf2699c1eaa8b9276" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f249c64d211b1ac4cd4f293b8fc8408f" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a033fa0dd62add7ba1485ae194256ecf" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "277c20844790aaac8add14456bb05106" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "36332aef298ef172a705d26cc8715fdf" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f35a3a06f6eba989cfab50ef42255937" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "40bff02f2be9cb52dc303cc990eeff5f" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "14d0106952f0a56de30e0bbd603b83c8" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b45596c2e71c2e8b6efc5e0b645bc6f5" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "725947b77bcc00ad389e5ffd50801081" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d1dd59b36c8ba56d35e2d094f8aee23c" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b89dc40248ed5853cfb078bed6b9e440" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "14f82e61ef0758345eb08b6c30587a94" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e9ed243d184aec8c9404760685b54089" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4f76f0507eab8c688cf6cf168ea968e1" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8767a9c9520ae8e0d798041f5cb13f50" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "700f9941948798aff5b432e75e006888" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ebd4033c03614ac28bf916f7674a7d9a" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3003f1e2ec2687225b6816fd508f5855" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cb41558d67c5024cdc9be97d9fcb8f80" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2e1b1f7db8c9f35da6ce9f8a911f3d35" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "f8f1e69aa6b0f786d01a7b84f30eb974" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cacfb44bad0add2862460b43d26ea82d" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c0175b07abb6f8b526eaa33039b5429e" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "29e2338d72007745f6cc104fa8579471" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "17b931a7e1438b05b095c02db37293d7" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "dfdd893b9a823fcbac0dc4a7d853740b" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4c6ca4c8e870590d7c38c47056eb365c" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "149cabc42a875aa800fd38fca3e21add" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "88f427d2f35f8ca028f66fa04848cdfb" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "a53919802b5af8a1ea03a0a3e8d8251c" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bd344d26d2cbdf865fe5bf0e0651c8cd" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b61af61f353e73d72aa9985a00606325" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1ea0e63776d330eb7d84b6740cad62b2" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c28e65ceef640b38cc03fd2880cf1bc6" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e63c6c65c708ace0a4fcc743f9781735" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9be64e7c087be6bb83b1d5c5faa16e33" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1fca6a9a7c0c1b30cf3f49acdd9b83d2" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "86378867990096681db9b22592990db6" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "b6d89b7acf7fe40740fc4c41d150ff30" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4ec001110d7bb3643599239027127384" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4e2ca80db2b536d0240926868d3ac5c6" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "e28d41eac609024b38febcb896918a35" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ed99ded14cf3d0244e81dacf98538338" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "34ddcf9472bc2ce80eeb8dfeaa4a5710" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "58d76f24bd09e31adf5b3b91d7403a8d" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4b2a3e585b57ae69e35690190169cef3" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2b14f28d6979ec556c8fa53296cbf880" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "a9e0883a01dcae816ec3a08adbc4121f" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e715e5580b58ec13a18af2854a97f0c7" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "eb46bd17627c9727c515f8cc5aec35c5" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "db4ce43bb453ca0885ce0bebcc6be4f4" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2e20a7548265f9df2089822050aac57b" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5c3cb77a4c93798f9a52717634ea6b46" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cec4129750a5351578274f51d28327b0" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "67fa90f0846f4dff7eec1a6a764d967a" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "237ba86411fe53a5e0e4505569dc8632" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a067ca99d9d64d592cb004320689f82c" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "929ec4eaaa6cc4d6e5ef562225cfd4a1" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "72195e1da7dac3b3895f65e1473839c9" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "3916034214a41606dbeb3810f0f516ed" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ad4753f3aa3a541aee93b4a70e9ae84e" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d038b53bfca4e1ca169562ca365115fa" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c818034a5a5231a2041aac152a3d8577" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a82d3434355919bb2a2548e46713d165" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3623d07b8fbd19cc9a1dd41b758255bb" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "ca2add840782c9a90d2bb410e3284ded" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0d473927b5c7afb44bebef536b849827" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "df4bb163f5119baeae1fe79c9ecaf457" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c4e3e93daa8cf8cda7ea8196f6b166d1" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9dd78e90b1353484431b915a6e0e3c71" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2b779651aad56210a6f7f96b71b36d13" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9fd4c160f64bea5ac7b803dabb969c5a" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "e3bec96e5ae9454477d1d8040662b8e9" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4974382710221d9beac8952340ee1436" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ac0807aff2a7f92d772797025da86af5" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8cd4a22782e99e1e083efb1cdc5a98da" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "dae168328f84fe20e137c9b5eefe8a29" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 18907136, "records": [ { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 14696448 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 } ], "md5sum": "eb12da91ebbbc37c0e001cf0a52e3d0f" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5e3d7f72d6735aa24ddca6ecb13e2d62" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "02b8225bb8ead27f0d86a07efdc9ffec" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9f859eda6343cfdaee8da10d3abfa6f1" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f9b0bb307d4d8f67896e9eef13fa97d0" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "eae639080c58e50fffce3f5b9ce7ac75" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "785de765a97b5ad1fde22a2e9ff139ee" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "804e7241fb5f7cd95acecd7f8cf84505" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "69ea2515ad42524ea0f6d0b689aa29f5" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6a09d87fd61e7bc37c2c170a4c682f66" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "dd45f20e963f6ca324381a32410e654e" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d869ec996ddeafeb4c6e1ad5cd3f4354" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "87d72c563e3a8704c0086e824bd13196" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4b0f792ffa910ac05225d55161a64631" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e0f4d2a8cf3d4b740e79c57a2fba94e5" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e18f743db30de8d885ba3d54e5dbc544" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2ad1e06047f22963e8e7bb3eb9135902" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "44ce8e993d73dacb7e529995d8929f4f" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "c4c7e81881778f4b9ddf1e4fab238e07" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "11d165984d405c8895cd88ff2237bd90" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "002db14e799ed01d6a9951326b4b2f19" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "49e395d4971639f3deb49e8a4568ac7c" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8f65359127994fa21f57943a943de4cb" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f728aed0d7b50a5f28ac73e3b0575be9" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "793330d0ae186dd415b58f6413a01c4d" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "fe0d4b97041b4791539315baef8bfab1" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "40ec6174a8480043f883b2b3e313d3b7" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "4e0c705dd5ecfcd03dd936a915d0dc66" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "dc651df7ebc7a5899fee57549c288df0" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "afb7391a5290b93c3db199506b960710" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "36ffcb9430b193838d73b95c6561eb94" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4f6c9e82232c60d0e282c54ac3a1959a" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ed7ef2c2a22f756e9db5a5e70f9bb85e" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ed512284421fb9773c7450c64f0c35f0" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b6c806a3a15a07f1728fa66d43cbd97c" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "82d146ca4c5edaeabe1f7399e9a157cc" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "4483249491dfa74502a6a2208f130d6d" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bd6e8300635ceaed9c6fc1d9112712fa" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "89695c2996b8d08a56e2954eb5b4ef63" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "029cbd32ee7c02b538cfa2e741c3eee7" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "552c739e645e2a4fb30a70d692737dbd" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "526e3105099cc848639ecef186235b5d" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "35403d5da7e20c5ed357c4fe433e1c46" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1d2e3208d0fc9ce36006604553fd3f5c" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "69bf496720a9701e1177c807789c3e9d" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "baf73c3e828446ea3d75479abc470a96" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "698987035084a1d3806e73d01fa08c26" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "60f092a7586bf96ef04d412c54a015b1" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "5296e185c6f5c9a8ca897653c2e28bb4" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "dd755a3f48564e5f5ad1b2dde38178d5" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6d36b10401a84370dd93d9729a076aa1" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d0600d81e390a7a3a221ceb406ae5ec2" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6f08047e06699c5abb5f8f504504634e" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d41899b6bf0053a7c5213ae185cf4922" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "bf92671d3cfe7b10bc94d04265dc2b1f" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "63332bce298c37cf8292e8dc32d32a76" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f9ff07bda44a4301df01791d3d5b1c26" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5b43df4e563d65f919a5b1966d9dc4ad" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5d71d5938da1c71eee0df0bb4e731056" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ac7bde4c775e27b059efd26a307cabf3" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "037ac8e41160c1995245de416255c03a" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "5d7c791ba6c06111d99ea45e9a24b14c" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9cbfeac6d49cc25cdf24f9d498715b07" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8fcf03b2986ebb6da4dd81f09d4a9651" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8436d1bfd0e0ae947c602bdff2b282a7" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "321e15d2709f17730c5cad0483e42492" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "4ac2f33a9620760b285d03ae6b1ebce5" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3e1b9f00973c5a78f7e0a4c7fa5af73c" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8e4624843314e5d6f0270bdc438f990d" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "fe1c4047d2cdade601d1c0b49ba3d3c4" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6cafd54b905995d898eef8103a186776" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3fe82ee8d51b322b244a85adbe521beb" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "aeb1d66ebe0c2524ec050e9dbb1bb61f" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1f6c1f1265fab4a5685c727b4f31b20b" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d842b7ed60c5a849f1088d3678a023c4" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8fee3beebbeabfa1fbd8fd5bdf1aa91c" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3404f55c49d403e06aee99bef7d13244" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9fc13e3fd7279c632e644bb724d08970" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0e73a4684ec5937a52f26fddb92d19af" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "1fcd8f3d5a5299144170124ac4ed8d7c" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0c8ad2b846b2d912f367e7d4168b4c25" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "45f2443fc55511b94231a5d0cee6ae90" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2afcb09b7b2820e3feaeba6d562b2cc8" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "203e381525aa74e7cf25dca337eb1216" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ad74dc7730f5320ee34433abe273e743" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "968903d555ac66818a1f67a34f3cb93e" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 29409280, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 14712832 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 29392896 } ], "md5sum": "5cce6eede08ac8098860e4d679aa2d85" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3049f9e84d7fab41561b4af0bb5a00da" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5d84f6c2dfbca41ce5e7355be656b927" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d715e60b3fdd63068c6222fa1e52f1b1" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1ceff429826f29524653c4e9ce1f5d93" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "751c65072f0d9686b12327e3972302ed" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "14cc052ad9bed477ba63e15b29b30790" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24150016 } ], "md5sum": "e4375dd1295f7ab383f851d21236005d" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "401ad77b3d628b88c78e1f0f8db91a01" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d2bbe605ea7ace155205b823ee59ccdb" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "cfb7fc2f5b674aeddbaf23a0d26cc240" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "44af979c20008e1784ba4eafe0297ad4" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "31997e38bdb2a08972fc59f42fab0e28" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "df5e9aacb6a647f25d7108935f7a437d" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 28360704, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 4194304 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9453568 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24150016 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28344320 } ], "md5sum": "aab2e35fd2ce81e01fd313b507b9bb92" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8f6f05e20458f750c8c16e5e359eb5d7" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e4de48a9f5be393d7ee8fd7252ed7c1e" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dd66d1571b46f00577bae7178e753275" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "69bf75f050a90241e703ce49d2b90c73" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1c769bc461796e2bf4179cab040cc38c" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "3a7a8f74ea3a8e6b538d7d44d2143745" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "67014a38b11ca8effb6511d94eefa21a" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0e05ff59e562f2856858e54d5ad5b4c5" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d726f5c20c12a5bb85ea428218a352ce" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7f8c7acd55748fcdcd81dd9a5c584541" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "23d4fae816b3b48e898f0c83bded579d" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "a0ae8944935a11d833a44578456accf7" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ac65f4d6fa1b4d11b797147bdd88f15d" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3ad4d8970ae045ad51ff643f4f1ed41e" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ef9bd631745da21d68b2d32a39a1da36" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f3f5b71786144f7abba659241d02f70b" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e8a0a19194e6fc04d298db1e9f9f8a94" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "025d825b9bcaf5c85f71f9b671ba5571" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "44e3d1ec8093e9375f0cf5fbb952e6d6" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ec789c4e369728f87d8759dd4256544f" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8a3dfa9d77867781f68a584e92add1f0" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2be83cdba8b75839d3cfa0262fa91954" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c335e04a4c762d84cc04b157c8ba854d" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "391ec43b28f08ca9756a8941ef511eba" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5c493dba8205393ac79d0977c1b00662" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "563c087bd66dc9f478333c1a312e851f" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "16970b5cba856865d3a8acb1928a3ab2" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9239aa5afb9b0eac3d3d9d34d02cd96f" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9f992a94f21827f05cdf462d7a10c294" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "3fe34c6a1dae1f4e99d7cc1cd4d7e259" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c0b5993243712394ccded88e3a941ba5" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f535e83dd504bc9c1a914f32e254b2d1" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b251c437273c61600fce62f5cbb6180b" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "02153f294c210a32415635da335fe879" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7324a29ad5aeb0c0592521c99a5907a1" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "e6ac2310651aa3f3ea0310ec15ecf1a9" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e42807bb0c424fe113646b010cc82a04" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c4317659f75af8c99293daac3fcee077" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2aef841451b7c9064f7e75ef6a323b06" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "97d01fca90f60977e8757578a08a540a" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0a2c2a65fc80963f6fd5d6500c1fbef2" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4eb734f674ecd57da7e1e97d432e1a3b" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "17923ecc7113a037e2f8120c715ab053" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "10bd5bc2311a13488bfc0d466fe44851" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "bfe701a46ffb040735cc4dc3b0e56622" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "be40d50fd133199741cc76158a9bac3a" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5b3ac56d1151f261f8b13de50c3a1bb0" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "ab29a70e495e0f10773bdc77af9e5050" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a855102967e2e7c675b9f01b61113901" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "89d1b518c12bda78ed6546b94926147b" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a8c0d5c1399554c15044912964dc0837" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9f7ff5de5bf7b1dbbc0f646caa8c09b7" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "00824b79a1fed1107efb31a310caab9a" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "dedde11ca27027250a95aa8889318f0f" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f6c207a84cf2f3c2495f40d38bb37a25" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "56df80ff7010d09568beccc0698e91f6" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "592d51e1e0b4f2d8d045236afc3e20d4" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "08ee93c52fa70d7ecc39e35f3f4b1a97" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b5ab28541f4b5729077715d74c9c192c" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5e2273978f7d4c0e6e3dd14d8659b6c1" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "6601f32be9f1f561d5132f8031b6be4e" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "49bce736287c706f1f52cbe11cdf02ca" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a16e05e247eff8fa6084bd20365b3176" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b8bb7083172e3140101b9d95a909f61e" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "47e71dac8e3b417b7574ffff8ee3c18b" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "be1e23e98295136b36ac5a6b85b8e866" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "12d76e08129de7537386259649e64f89" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1c5dc07c427a91d3a78c3d7a431d3f1e" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "aca71a88e3bc80c37a64ba46a73645b7" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fda39b594ef381ea9213d81582d8a3dd" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "00703194b62c8fbeaebe882076a64c81" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "09d349aaf6384238b351105203616fe0" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4066af21ae8f777e2a0e9d8764cf9518" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "32ce691084d99ec7cb90d4db6ef003dc" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f513ec7cf278056c8a4a570c531f1216" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b413d77113869b7b399542ba89e63d29" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "394a0d80e62f83b63b74955a0d65d12a" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ad00d28412f886c1e2ee659cd2ef4b53" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 29376512 } ], "md5sum": "4f1c848224278131f60bbe55279ed984" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4b864f11f603dbd5944c5afd160ff3fa" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "046e4e1ef2da759c6fa5874e65278046" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "58e50ee84ea2d521191fb33bb44f4534" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "050fb63dad1abd8c96b9389306a1253c" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 18907136, "records": [ { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 14696448 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 } ], "md5sum": "442d6303990f7d6f7e08181446492c74" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7eac88442b1c7dbf7c9b01417336fe36" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d68a604a8a7cde720248158baaeb5fb3" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e9933434bf97e5071e7c92dbbf9fa425" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8d6a2a8330c36f34a5e9d5056e3ca3ed" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "65164fd163c577e7cb446e3c7de970f5" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "2973295a8c9c1b5e503b1a8b39a89df6" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "06c773083be5836c24028d53ab3df8ad" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "54a15fb267a40c0349303ac2312cd177" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "805a5093578f48b31b8707a68fa0e37e" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8d2297f25d5b8c5df5a502d188acc8e7" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "438dc3d418fa5e96388dfdef75bca6cb" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "69de340841e2e55c2eba5b50e2e2a886" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9aa59f2ee05737e852b05747432f1032" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6db42f1d4cc23bfcf938ec1d8d51ffdd" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "baf5a1ada5b29bbe749973dca2fed8e5" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "023f811a1e4b0efdf5db89c765650395" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4ce9c2cf06b43af76116d999c80fcdc6" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "1f5ac9ecd224b54d29131dc2e061af22" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "15840634201b8a6a82d5f331d22bafbc" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "898fc1d61c94635ef743f783222c2e8f" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "72555825804ddf9cf5d3a2168e35bdd1" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2171e0ab9b457d1dfbca0e13da776615" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "534e288c8e3962d9f6323a4b18683a22" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "eccd768d0444cd1e2c95eab5601eb66e" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b732286242dc99cbe6fa0dda1805f3ab" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b116087121f8ab62155c9847f410b39b" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "8f01a6bb9dd0f24f01d58d4f10429ffb" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "44601f73c5e83e0bd42d0b5915d0961a" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2433b56edecdd2e6309d6e5e99ca6f93" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "a986fbe9571ba1ceecbb064a5bc74cc9" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "cc702c5d495bcd4b4c6210feebde87c3" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "050efb2f5488d53e66ae3ae4fffcbb33" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "535a3e62db2069aaf45cb50cb42368f3" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "66798ab694aace297c1652f088092b25" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "119e9d92bff3619213b632e5512af112" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "db236da1e37c32527ec46d01ab4ba71b" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "11262325d55586ae6249433900bc83b0" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6bacd5203e9f4fc0d13cd120955fae08" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0b8bb948b2cbd88975b57ea8a3730a58" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8d9eba94aa293dcad09f4a24b06cf5e3" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2a4b74532497a69a075db74a6acfccc0" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6cbc77b117c6f866a7d0fcbe372afa0b" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a4ba91f4810331811117225cf9a6b4bc" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f26b2d0d6c5cf8a4e6b1f8b003c91c63" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "b82d348656f206b66446d310772a2ebc" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e50eab15ee0e68df2cbd81153bb8a29a" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c7341f0a4372ce0c59b2bcbcb4ee199f" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "12864c7cec02d2f9943895c4f4b2bc6a" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b5da6eb5a4f216cdedf1417f733be53d" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c8bc856c4884d26d5c9f433ef458beeb" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9f4a0dcb269e964cc5c29553c13a0e8e" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "47778d8afd2b6befc1ed006f9de89cb8" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "154314e431ca05c572fa66a601cfaf47" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "d3bf76f79425bed262c2583c112e8d6a" }, { "dataPath": "params_shard_456.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "82dbca69ad912eac81047bab63a746d1" }, { "dataPath": "params_shard_457.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8641e16be2a7a7e37b3f251d24c0bc34" }, { "dataPath": "params_shard_458.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9954a191167b16593d9f172470e73b48" }, { "dataPath": "params_shard_459.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "999f6611a8bd12c4d5d8cd6711f0b4c1" }, { "dataPath": "params_shard_460.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a19d02c18221d9318b73419b081bf9e6" }, { "dataPath": "params_shard_461.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e722ffbd7d016ac8ae7e1a30a96b4835" }, { "dataPath": "params_shard_462.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "2ea4ebc654f82b4fa524860cee4c0a61" }, { "dataPath": "params_shard_463.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9c1b671ddd666311070c360a4579f7fc" }, { "dataPath": "params_shard_464.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b28874631b6dd345b47414ef71c995e5" }, { "dataPath": "params_shard_465.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9742b71abc6a452de69cc2c21895a1a8" }, { "dataPath": "params_shard_466.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c054ae03a25dcc11cc8778796eec97f7" }, { "dataPath": "params_shard_467.bin", "format": "raw-shard", "nbytes": 18923520, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4210688 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18907136 } ], "md5sum": "ca7a1d0cd7793de7c59e169cc5ba5b31" }, { "dataPath": "params_shard_468.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d365c4cd3b22eede366c02bbec0a9e86" }, { "dataPath": "params_shard_469.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2c10aa1530f61fd7b03d638d98b1fbd6" }, { "dataPath": "params_shard_470.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ba5f2d95dbddfc2f526bb880e57dde3e" }, { "dataPath": "params_shard_471.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "573d5a90189faad6bb6e0e4608d068d0" }, { "dataPath": "params_shard_472.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "07647877f6c7a58586a8a5389f447b80" }, { "dataPath": "params_shard_473.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 } ], "md5sum": "69f111a81530e455587a096a935a8c36" }, { "dataPath": "params_shard_474.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4d0a3f671cdc776386eb9cecc62ea5d9" }, { "dataPath": "params_shard_475.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "882e0e154f1281f339de65507e3e28e2" }, { "dataPath": "params_shard_476.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4c9936aacfccc30727c4a5bd2526ef89" }, { "dataPath": "params_shard_477.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "259cf83c8338b1a5d768bd9474b36711" }, { "dataPath": "params_shard_478.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "40b55660002bb8c2df6a584cd5f24604" }, { "dataPath": "params_shard_479.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14696448 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19939328 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24133632 } ], "md5sum": "5e2ff9755c18a4644cfa378d01bd3128" } ] }