{ "metadata": { "ParamSize": 225, "ParamBytes": 694996992.0, "BitsPerParam": 4.500857951427515 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 66584576, "records": [ { "name": "lm_head.q_weight", "shape": [ 65024, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66584576, "byteOffset": 0 } ], "md5sum": "0f9fa0a19854bb49f3e6a74c54b1b44f" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 66584576, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 65024, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 66584576, "byteOffset": 0 } ], "md5sum": "c4851353a8d2563e47c3bf8a0d5bbb43" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 23138304, "records": [ { "name": "lm_head.q_scale", "shape": [ 65024, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8323072, "byteOffset": 0 }, { "name": "model.embed_tokens.q_scale", "shape": [ 65024, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8323072, "byteOffset": 8323072 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16646144 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 16650240 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 22417408 } ], "md5sum": "4c6ed7223e2a4162776e7d8af803eebc" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "79dfe66878509fdfa072510efc71ca01" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "e7022aa5f2c11f49ae1eb3de1c1760e1" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "f45cff6d2687bdc703a34bb6437b709f" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "3219e41edabbc8b1decc2baf5b8ea7e6" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "d7d5c74a612879f020e3c8c98899a45c" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "292c4dc65c8ca71cbc9752120bc93e91" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "c95244eeda3cf25438d388239bae2eb9" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "1d7b6cfc26c9d9fe95bfb528210928d7" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "93ec21beb9f888dffbb5d8d3ff12b987" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "252eb9f66784e3f186d332a2234b3b47" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "059a30652f62bc53f40deb3742b57dbf" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "c41e8ad7f31fe03e8156eda6593ac2c6" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "3dbcb76c12ca477d6df8ea1aa572674a" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "bcd7ef15371c7bc90b12f293d4de7e71" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "a7bc7c31bdf390d7b42269dc738eb8c0" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "8b6140e47fae0999f13e399639b3f6de" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "bd6606704559e75d7958a06d3f9d37e9" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "4e00b0e85bb81d8dd5c3e7a4b6f8a1fb" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "98bb60cf7f41ca949866554fac1d6b41" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "922f2d133b9adbd0699c0df7272ea578" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 24780800, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 2048, 704 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 18292736 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 2048, 176 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 720896, "byteOffset": 24059904 } ], "md5sum": "8dad1c13c9d46b44b149d8c8120c76d7" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 18292736, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 11264, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 11264, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 11534336 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12976128 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 12980224 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15601664 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 15929344 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18026496 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18288640 } ], "md5sum": "f3d45dacc534f3d71074aa3643a7860d" } ] }