diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4311 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 3048549376.0, + "BitsPerParam": 3.619307029695688 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 52736000, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 412, + 32000 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 52736000, + "byteOffset": 0 + } + ], + "md5sum": "07f8e55ee35dd00e8f86c07782831d59" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "8c8924c4b1b18cb8c2c3a6317e9311b7" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "33efdffc1b6af95dbb3c7662daf303c6" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 31492608, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 103, + 32000 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6592000, + "byteOffset": 0 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 6592000 + }, + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 6600192 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 24688128 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 26949120 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 31484416 + } + ], + "md5sum": "014baaf8004e09d80979644eb5ec1c54" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "c338611843ea40c62cb24b6e4c93cbb6" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "8da9f9a4295e2d3f5a78aa0e84653166" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "ebff3a5b74713ed1c1598e05db64588f" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "bbd4cf405a5bc7bf3dc59196a10111d3" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "ede45b002a11271404cb56003536786f" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "b93fee99e544bea6ae287d733ebd01d3" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "32a08f29bb221aece7e6d5af581be687" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "b89dfbc15b98c44fb1d0b8c3528bd92d" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "b0f904587eb07e55f953e91cda01f522" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "9ac9441c2d7a71eb10362af76fc21f45" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "fedd6aa7492daa7b84d3127cd5279e60" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "e47108f54248fcf890830ab17325cf10" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "f0072616dee633be7e0353a1ec666338" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "3c3feb0be5448e5de8a785c63bb07275" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "dade4b0908536a6fdccbfa2bc0d7d4f9" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "11d7e812a942b26379d13516250afe92" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "bc69fff619c6d4176ef39b17ba5601ef" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "f3f086b103d81adec7f7ccb3a5a82b34" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "40ee8443d3a9c5ca5ba0528d19e0d735" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "6089df8d8ea3e30dda7faca94fd6a7f2" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "c8f4c86e79a99b6c27598c1a293b4203" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 52736000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32000, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 52736000, + "byteOffset": 0 + } + ], + "md5sum": "d1ddcaea2b2921ddbe501527c197dd71" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 32290304, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32000, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6592000, + "byteOffset": 7602176 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14194176 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 14202368 + } + ], + "md5sum": "6a9299266ad2749026b9807da9fc79dd" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "0e176568fb4600bf8e2ae1171492c732" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 29586432, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 2260992 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 6796288 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 6804480 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 27055104 + } + ], + "md5sum": "cb5226c2789aa3b257c4c6c282ff74f8" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "cf6dda7feef58c9ebd891ce80e70696e" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "6170f936c8fdb4d929d697cd2fe5f0bc" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "cb19c222e49ceb3d605f18e0dddf8c88" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "997cd72ab22e9e4c006ff36b4b535f89" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "41fc28efcf19bc0a4a957a4380cbc7b7" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "a58ef4bd17bf66d27d834579ec046177" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "7485fb56ad430d6dae9aeef0077691b5" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "f8eafaf364bf582016c90b99f5f64d03" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "4b31bad9b66b474a98a01bf6fbd2ea7d" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "0b49dd2ead668760eaba10a2550a62eb" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "37d6b78521b049226a2380cbaa3d12d6" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "17752870d30585ab75924279a5391832" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "3a25c3c068f14c0f4a921a9eef3baa3c" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "b399b8c263550a0477c24784f8fc90a0" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "29fb0d329829424c32f2c6e4cc96343c" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "7e2dd5062672a1f21c5ecdfb850b932d" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "a94778c911505dd60d440fb5fa77e8c6" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "aba8ad42e51ce1820f5ac17404bc6230" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "0bcdb8d6420f8cad7da4ee1f4e19d123" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "02bfa3e425b0a24930624a54c5dd2bee" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "cbe33aed3e352e75518ec0e2ec71887f" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "0640e445724c7d470b17fe6e3d2ab51e" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "b968a802eb13f012a8ac1daddd4b462c" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "334a6dfe5f04de64cd375266cb2e1028" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "671348351c8d3facaac89c7484fe67d0" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "059bc02c6f588a8eb3fb0d3a5df6823a" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "7d59a720274bf7f3d2c0eb4c17c4d643" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "5ade31c6ff5fe904c3d8f16c36c19d50" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "82a3cca5de13afdc2e99d944a6a37d94" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "ac862c2f9287918adbf697ae3165b0a0" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "8aacd7df5d3591a37bf68a9107e3c9bc" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "913c0a41d8d09a7dc84f63931b85c2d9" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "b342f3390e81dc2d6ba4ee6f27aedf77" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "0e7e99aef4f64ae58475f8716ee278b8" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "c66da7d9d01dc75b10cc91988b5be9a9" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "2b0373dbce085c407c71cb8e81b94b56" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "2e46d6ad6badc60a785880df8e1ca7f2" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "554ec4304520e245f1bf638b7b2bd742" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "87ab2571248d7e00a9b3e7b34184cf6b" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "0feafaa40f87d81befa4a3170da607ca" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "9bbb03718be11e47ec709a6479bc7379" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "ee095c22c5701beef624544e384c9002" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "b28aff858a0cf03441dd7136f93e5a35" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "78091e2424e35bb47df4bd08ba92fa6b" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "4a26feed4c8b74e73ba7d8a3393661ec" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "035a01e1016360fbc79ad49793c9489a" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "274c758862ce360905927fd2724e31a1" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "925a02a7ff20c5f5f79ed6ed8181e296" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "e7c9d6604f23e96e464f2a63a4df91a1" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "c297ba633348549bc8684f5b5261ec57" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "f0448c76fffb6a19587f0af01c4ba60e" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "a57f89a172332fc8fb7c177c8bd5e0ed" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "94dee99ac1ad914c4edbe86c87b5b1b7" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "dc02e4323568bb8159364b46f0a0c490" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "a05c11d9bd4a3a39fd47c3fc5d0294f8" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "8fc34b2bd472b29ba356d948aebbadbf" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "e0c29c82d04fe7a8caf23d2ee849a106" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "4bacb5c87ad3fda5faf5e1ba6a9eb053" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "2fdc8c38070362b0fc8c399d3e2dc6af" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "5588b35b272d254821d56587b9bf1e08" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "196be6a9cb1621c3b17f7564e6261c91" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "26f3b40ee4efbf77a3ee80ce7b45d766" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "935a62e62c30d3f4e5bf045260c590d8" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "8e49bf3469a0e1197703bc3973179e53" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "31ffd7333f847aa2946757b68902806e" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "b1c53c33e63be8815d892e6b089ade7b" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 412, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "004dbf4412b46538d0adabd067931d97" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 412, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "9a498bb9fef5c24fad84780b62037db8" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 1104, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 276, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 103, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "20f7b7d8f90fb15604cdc9c7c0d379e2" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 10125312, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 103, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 412, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 103, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + } + ], + "md5sum": "2ecbea5a6fcb4b97713059b075dff1ac" + } + ] +} \ No newline at end of file