{ "metadata": { "ParamSize": 405, "ParamBytes": 6889973760.0, "BitsPerParam": 3.749252192749517 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 335544320, "records": [ { "name": "lm_head.q_weight", "shape": [ 131072, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 335544320, "byteOffset": 0 } ], "md5sum": "0d9643d03376592419461bdef2b6c785" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "lm_head.q_scale", "shape": [ 131072, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8bfc072df27f6c75636704b717388382" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "892cb1814798985288301bea72e959db" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "44d5c806684468de000cc36ffeaa64ba" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "ad6d5108d5955bffe499c6268ed352e9" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "cc3bc797c7bf9c744f1c074ac23f84a4" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 27566080, "records": [ { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 0 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 10240 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4597760 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13772800 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13783040 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 13793280 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 18380800 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 27555840 } ], "md5sum": "d030560b997cc8ea1bcf8f3ea7cbf9ce" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "8a99cd1708eac58bb0183dffcc8b4662" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "757d2cb6823f875e8cb9aaf77869906c" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "e16990cd7b17d3207b09a4f14c3336be" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "a516ad4df914e15c41e5137c97293976" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "a50f971f3127ca103545e77a72747ef6" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "b757abc0e5b061363f332e7296963500" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "2c04539a874c344b534c13da3d37041d" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "a6367e35062996212bb524b51dc1a84e" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "ef643486d3c07e81e1f1fc2ffe1a28f1" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "28218bcaede1acd6112e00f3421a4b98" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "c04509c17d8ec45fc02875a30832489a" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "6d96134e6d2a11fd2e799ae6aa5f1c29" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "174b5fde1d5672e59767d440f77bd9a8" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "294b806b9f8bc8d47cd3e19324b2a6bf" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "908e26c0956a03d2141bed5af5fbaab8" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "3505498efddd6444e1ed37c0fceb8d37" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a7e97a5a1830c38aa3ff74e01160dc34" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "0affc9e14468cffb7cff769fb8b4eaae" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 335544320, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 131072, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 335544320, "byteOffset": 0 } ], "md5sum": "3c15656f0ded974b83ebcad68c1f4619" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 131072, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6c32d4392a8865376dd946da79a7224e" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "8baa5bf4e4ccad298605b1db121324ba" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "32ae242adaa01515f0ea9e313b40ffee" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 25589760, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11806720 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11816960 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16404480 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25579520 } ], "md5sum": "8b5e39a641c983fccb5e7d4bf9d09a03" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "f3c837cb58076b1c02066f4a6037637f" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "d947da27228793b58c6bd4c2eb0b2e82" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "005e5d2c5bcac5915457e6dedcd538bb" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "38d70e35673446bab3a44b54c2aa40ac" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "ac544895c4bb078e98b18cbbf6784cd8" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "d24dd7dd9405916eefdfa84cee93b1cc" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "8ddfd607078c8d857af5501daae0e994" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "6f8274e5c04357cf822ba7279dbf03f9" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "5d9b5a264804ba0164514a31f760e65b" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "24f10be03f42bc6963e8531cabfd646d" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "9c81ce10af0d25d0b6cbed31b1475884" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "7a5a5292398cb46f307573e32bb7be87" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "1f04efd13fac1bbb9569f2f7555d28bb" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "72bbfdf5979e203952960a030125df8e" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "b43e9899f03bcce80dd7815dec0a8460" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "79cb0e43aaf82a422501f15881e347b9" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "7a9a1d16f703d84c54d4ac06ee394a27" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "1555c2f8fd5229a1e0653aed872fd217" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "6d854a94e4dc4d535c494e4a21192797" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 20971520, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 11796480 } ], "md5sum": "aa75be0920e7f3ed49f40c13c0f079ac" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "2f2aa2cace33adc8d44b86358e07ddc9" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "0ef5619d62c48aa31ff34a33edbfe7f8" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "d069ecdd03c1ccea9f14f981af4c1894" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "4844746a888cf8654c28b8cb79973356" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "6993e7874b9ea4a1db89a8977ac4f8ca" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f0383ec44fe7c99b5be901f873920f7e" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "2fc9bed0ad21eae2bd70eb7646e939cd" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "17ca0ef46466155730e4470a27cbc2e5" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "482c88b59f548772292c6c4bd73ff7f9" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "5b3707ea21e9319c9b1c867b25b7d9a4" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "eb0bbbf064aee02e16da80a41aed9640" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "f47b079315c809f5c6b29a37703b6938" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "5f6c917d162bc6f292d56b48ea608ac4" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "f8fed5e207d95b1e4bffbe907160cd45" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "6f1d3904c2b64ef0d338a71bd074c3a1" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "89ea0e68d2f72e0964af0b80d9aa1d40" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "16416a31bdcc0f7c4c601900e50959e6" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "ee0fdf1d515783fcab547b61826d470d" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "0121934181c7321ed3d77315e4f1266b" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 20971520, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 11796480 } ], "md5sum": "63f282c85f5c695c40b9b96d085d930a" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "aad0e7e6a89be4b19ad116dd65ea41cf" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "ae6133fff0a7769f1eaca7db05046aac" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "2f83a86c5e2b3cab440b289ad0a9d04e" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "602e6197728c0d778226670760fdae4e" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 18380800, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4587520 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4597760 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 4608000 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 9195520 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18370560 } ], "md5sum": "f6e03cc613e77c0d93deebad4a7133f9" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "736959dd244aae58d8f2b042c12777f9" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "8c86436371069e76c0ad6cc2098717df" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "e85a51800ace423b76f4f9add4edd31a" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "97db87702f8e6d01eb368c67beac7fa2" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "e9ab3f9778556554b726069db0f7ad62" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "942ed7be8643f51d6729fc4c44ba57da" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "26d963cc7bf45d1ab5bf112a4b57dcbe" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "9ff403d8e24023e6841c47307761d031" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "94fe0a656ea279a3b18fbc856a168322" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "56dae122c54a24e8863ecfcee1389e46" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "fb2bd6168248b05ba19e09159a768c66" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 18380800, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4587520 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4597760 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 4608000 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 9195520 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18370560 } ], "md5sum": "f887cc9b3c67346744a576707df50de8" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "666ec8783b600d5d2d2673a9879b9d81" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "30fc24d9a59bcb912f8dd457128aded0" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "6af3471d49cc0753b38a779aef67e213" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "42f252c06fecd1901f9e197af09f9c51" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "4239eaf4cb8232d2118a326877e0ce73" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "6f46af9d221a20cca00e6af786a8c38a" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "3bd85cc04035d63829b959b7e4fe9e3f" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "73fbb81c7eebed4a417fd02fe0b01429" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "8625a209f47fa228a4f1b447034ea620" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "748eedda44a70cd10ea00ac996c6227d" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "8e71fa779a68269f5cec54b73bb291b8" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "d1d1ad06369c032ce940eec3ce570f98" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "ada050206a3abd73f1a5950f65be57b6" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "2ec8149c48738872fec1e475ec4e04dc" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "bdfeec76d7fee84236c5275e9a8b45ff" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "d505051fea4e3d0cd67a8b782778042e" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "dd50c3d8c8a73aeafb294d2ffe05eaa5" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "9cd679aca2d4141b50f9fc49e4a54bb6" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "0f1783c94ad3ae319f0054a12fe7830a" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "608c578278577729084e8cb3cac249b6" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "a83f1c87a799e23d4868a8313c897e4b" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "6b24808eb59993b6c5082334eec48f46" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "b66a8a98fe639739128be7733aef9ea0" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "6eecd0ecf94cee23583e7bcde6a9f2e3" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "143beee6f5c9ea625d9d0f24aa93b58e" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a176f7dd8680c786e1dbc34f2bbbb586" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 20971520, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 11796480 } ], "md5sum": "31ac9d6aea404dc80df014e1a3766db4" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "f4116a0d0fe2b7e04dd3aa720187b959" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "d3bfc7e007b84829d2165f6dd124d0df" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "fae1a1e1062166aab9c15ed1683342fc" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "824f0c18eb8469240e2c535787ff352e" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 18380800, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4587520 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4597760 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 4608000 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 9195520 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18370560 } ], "md5sum": "a334f4fd09e6116278b6a244239d70ed" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "3c041072ac3dfd6f43f2c0878f7b0cd9" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "6a964850da4bcab8032e179f06295b05" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "c6841a25798fc2c6cff7304d6f7c484e" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "d352251c6998bb2ae65912f0922ebf52" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "3b169c1b3a7aaaba4d558fce0e2dcc1c" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "dcd1cd2695d90f02ef248d3d02b7f530" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "faec1a609f3c2b26c2920edd2dc3d8f0" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "d6c6c6b6693234fac42f045fb58f0b04" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "5991423e2b40b05591d7167766676650" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a20fae8c6c3e2579b264da59c2f7eb9f" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "29c33fd388d06322f2ae682f04bd2118" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "74f3a7b0c534ce976e1ee4b4f12d0f3e" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "ae40fbb22dd6a63f65a8200d258c0b4d" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "3c14ad88595ba90977454528d741ebd7" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "5ffa7f3d7b040df7737d30d11970b07f" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "b3ddbaeeb64cde662fe0405d1f9c6408" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "0dfe6549baf6fa68eef4b4f44a4dc4d1" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "73402b2ac7776502761e3decab577f36" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "0541fdbd842ec984704f2d5dd90d5b6e" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "d6d6d55c7fb4984707102cd6f2110357" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "8feed7b6f03a588c38bb6f4d38a35d7e" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "a75a6a3dbfbf44378ae0a31e157480a9" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "6af5c762b573e9f39375a97c56a5fd25" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "d2977fa30cebdd9f12b76ddad6daad8c" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "0ec8bbf5541a77485846871f976e3db6" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 11796480 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 27525120 } ], "md5sum": "74e73c2bd92671eb4e4f20bb9fafbb94" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 11796480, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 } ], "md5sum": "9dd49dab2766b83e3b17fa735feb8d53" } ] }