{ "metadata": { "ParamSize": 885, "ParamBytes": 45448593408.0, "BitsPerParam": 4.619993414045882 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 622854144, "records": [ { "name": "lm_head.q_weight", "shape": [ 152064, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 622854144, "byteOffset": 0 } ], "md5sum": "69623354f39f1b1fc3b73004ae2e4af2" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 77856768, "records": [ { "name": "lm_head.q_scale", "shape": [ 152064, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 77856768, "byteOffset": 0 } ], "md5sum": "1eff3949187cebc031d5c3c9b15dca2b" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "ae3948436a625600d7f843611ec6fdf8" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "2604ad79a768b00715be1c628ceaf52c" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "a05d62a377c19d54ec4b70f432c48887" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 622854144, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 622854144, "byteOffset": 0 } ], "md5sum": "86be08810928ea6eccec1b6ab8ecc41c" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 77856768, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 77856768, "byteOffset": 0 } ], "md5sum": "7690469695095fd3e675f9b2c53cdfec" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "5177bd70f69922c1879e5c93f31af9e0" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "c3817d5d3ce53dca834956372bee1379" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3240116e10fafb3e65d7b013a168a899" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9a1d7d0af85d46eb37ee03b156dd8212" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "e9988bfe3d21311b23261ee5096a7b36" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 24645632, "records": [ { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15155200 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15171584 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15187968 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15208448 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20451328 } ], "md5sum": "e85ff832b441ab74419273cbf814d665" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "840b485439c4eee2d4dd5fab22bd071c" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "cfc694518207cb7fb49f2455875341e4" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "66623e910f3af11788f4e7b9b4406f5f" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f04405d4aaf11918edfbd02a3a6ca7e4" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 30330880, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 15155200 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30294016 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 30310400 } ], "md5sum": "fb58e40a475915aa4a161d0227a1f0cd" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "19fa3cf9a7afe66839ee1a85e521cd9c" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "290007dcb3f737183980c37befbb78a5" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "236755d3b68ec7091706f689d6fb1076" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "789ebe8087f50b8be256f0258eb4ccfb" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d7a72373790c220d0b186e9b353b5dbc" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a7de1b99dd1633154051152cb9440758" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29872128, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 9453568 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24592384 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24608768 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24629248 } ], "md5sum": "78cb3aba2d5e643799b4c0586bea95b1" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "b33dcfc10b3d4711211fa3d50b8da0ed" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "f03992006d585318402cebd3f9bc3d37" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "4b71084ea33ebfc3b3dc27bf275d4c99" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7881ae23370ca2e2fbe5b795540bd71c" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d9494307d1b62b9be2d33e8ae2536690" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "9232c04b3588dcc1e6c53d715f523538" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 28856320, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4210688 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4227072 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 19382272 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 19402752 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24645632 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28839936 } ], "md5sum": "cc11769e540239985ecdcb4b6ae8e7ac" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "5e6a9af06b8b43389bf29e7193fb5714" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "dbcc19a48347d81ee477cc4db3930e06" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8875787663642b47bd6b09ea38bf9ec2" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9b0e0bb63a3b8d77fb5f53801a71fad9" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "bd33ccf69370d5ab365d2aa107d5b144" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "c2eb7139b7380425281c9fe2c8699ac8" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "7753c76696f9809252ae1c053c03a485" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "aa38744f1299072056426cbd2328a8be" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "b7f31d79854d915a30f8d31606c6ff21" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8363e417af1b140e82140ea24d1b6c62" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 30314496, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 15138816 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30277632 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 30294016 } ], "md5sum": "a6d5ddcd78185c874e536b8037086283" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a6612af08c818a1a6cddfbe350309c65" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "957e191a018c636bc552d03f8aeb507f" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "bb0f546b701d3596d60756cf5ce8a62a" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "7028bf4a5c17d6543b37408f1055596f" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "683cfd1929bfaf339a38a06a0644da72" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f46b94af4e84cf7d67f4a6a5fe85975c" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 29872128, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 9453568 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24592384 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24608768 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24629248 } ], "md5sum": "fae0c786423399124182673085584d26" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c90ca3c6a33c96d59edea6c2591ac51f" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "65415c71e917cbdaab5add6a08e9fe5c" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "33af702c16a5877f94c4cd2d2df2ef2f" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "7858733cc3195d95a33c20a7da7079c4" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "7727e019a4b957da91ccf162bf859d72" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "c4d7100080760e8e146c14137b49befc" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 28839936, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4210688 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 4227072 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 4247552 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 9490432 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 13684736 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28823552 } ], "md5sum": "c40ffbbb6a1c2e6cee830cfc06061648" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "20b7f3d4d4f91a26db257f5d9f847bf1" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "bce194c4ccf7b86b1c918bb1d0736b80" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "461d98129c181004bcfb9cd1befad5b0" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "550470917926b411752630f2917fe599" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e44432b7bdf7d0e0d82a17ea09da2b4b" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "0d7b648659dda3b4b58e394462548c84" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a3c390f428cd05596a934e501081f1c9" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2e9717b40ce879fe134d7d9dbbd22c68" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "22ec41547de315c561db1794f8c6f2e4" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "aaace40f8389d53b99f7355dd92d92d8" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "d27aff2e3832178d4748c7d0754cdf41" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "0428dba75e5f04f48a618d397ae3a203" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "9d4543d05f63900263d679586194ca00" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "1751a2391398d7cd311937020d1b9fbb" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dc3effc6ef6abaa9b55b3f2ef507a20d" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7ed2b4aa60056db33dd84de65fdec731" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "0af5521109eb84f1f5b13e9b0c9ead86" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "803dc010d22bcfb368b0561ab32989b6" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "42c46936b92629bd9e780cd2ec8ceac5" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "de7523646d1d36fd3ee6f6db898aa92f" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "ae528746662fa5c9770e106d09e4979a" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "816c63d01ffd3f3fbc8fd09d1cd5943d" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "82a391961adb167b54e1e2ab76f486e8" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "6233888fd1a1e179fcfc30ff78ddf429" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "ffd8f348a340b8ebd94232cced61385d" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "896c4490b54db31564ce392166453173" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e6280db84db208c73a7615e423e559eb" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "45faf801872b2b8d99253cfd423939c7" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "3acad7152ae34851097475aac4cc7e0e" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "c113e0db4d4e8ac6da6e87be1f983381" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "c97fd97953d6401de1bf72b230bb08d2" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "6b609f7ef21d72423329f53cd6af2037" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bf4f5a4462d408df434d8a125804ac04" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "126023cb2628b4e7eb04f01fd41616c4" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "466ba00517d4b088818a3d94da8b13f1" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "f196eccb807e95899fc5ad27dca9b77f" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "3b23ca201c6f0262489b434115b0ffe4" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "59d2017d554470660470c4daecd694ec" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9242ad2f32c100bde2bb5eadfdc26273" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e1dc8090442c1cd324c4dc33764acba1" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "fb2c64e784fe1ee284d5aafa68378a2f" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "f42c8686b7412df541495a18cd4dd3ad" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "96cefe3c0a706281013198e42c2be972" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "26ffd395d44e7b7302fe27c740ae1681" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "209292a1e4bb55548ec30a209edc9f0c" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7254bb8e0825f6c4bb3700f0698e9ddc" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9b9453c8b483c2ba49f77fd2a7713e57" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "665c22aa5f01e91714938722763e7744" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "4c39a939b7705b78dc7e64989c6e0141" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "3d0c18952e4ba9b67e7d17cc01dcb80c" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "2374b250188f005bb20ea96dfcb945e7" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "c75489c1c3a6db02b8239e890ec4ca24" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "523fdd761e74730ec970f610323d9c8a" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "6d8e8993b6560ca0dc808e5d545549ea" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e9aa89238e0c2f31d4dce191ce5ca22f" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "cbfa18a61046809f233cbf0bae4c7f19" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "da9054f2d9281768135073520887b761" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ffe745e31bdaa315ce5021bb1bdeae42" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "ad64969a39552f6fa0b5828699247614" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "64fd008ebbe4f9d905423a21566ee6c7" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "17c6954312dd21511df535a6844ca083" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5ba0a9b6a1da5338a2e901869c6257cf" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "e50c62faaedb5bf88f8ecffb3c1e2c1f" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "0be5160989ec1c4a3b3d0546d66c320e" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "63e4efacfa7b062df89d46ab3243d7d6" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "cea4996c10ae3cc38dbfd89ea256be1c" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "7dcd2d51061a5027dc3e1f0ddb93135e" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "2bbf16e66f296969fd6cedd5bc80185d" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "45bb784763b268e151e83da4bc0fd032" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9b49452afbdabfde3d95f7711d3089c4" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "8ab752549504e349b985b8dac510913a" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "0a8776da18b30602d4acd6a71991b911" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c96343d8b3caf0ae09768dadfb7ed199" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f51bcdfcd75cfc463cb2d3b4669b8cf0" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "6b8785004d12cf7e8b395e0efcbe0d48" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "497716051a0260852fc0fdfb9f4e66c6" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "3e43895a44ba90edcec11d2d80da8170" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "39554a10de8eac0627333acc92285e48" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e0cf397ff3d2f94a8a57f0713ffed4e1" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "d69a7d9837b5b6d82de6c5c0710d1792" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ea5b76311b3b61195ab00b0d0888cddb" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3823ab9e064e163e86511f5bf1b03cec" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "c9bab509baef5a3d976620d69a0f734f" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "e63db724cb31c30c9411936078dfb2e1" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "598c68d81c5b1a64342bffb3fd21d81a" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "70d70c05919745292940892abd0587c3" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "313be29707f344cfae22e0ec4ebb1082" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "35bccee6acaef170066c6c25fe36bf4c" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "270f3814abe600fff93b45c70192afee" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "91e9e20d2ec9c1ab737af5f536ed2f20" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "f3d1181063fce8beb50a04f5abe6b223" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "6bca0117ae4f94c18e069a42a97f85f4" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a48421d623a9e180fdbe6c9b06a38744" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d1e6ee257f471cc2ecea28bc21d98e56" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "9f64186444828de5ab62aaac0a59df54" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "0e670b9003a9fa034a525f585fd09469" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "181f6901f5ce8974f3496e35370961e0" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "0966a504a65a0d987eb7285217afdd89" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "057790d454f285975ca7de59f333c2e4" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "16f959ce68eacc7ef67a0275ddcdfdb2" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f92799be5cf8110884909a58f6b0b542" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "124631958c84b0cf5321010690b0b660" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "6f39a09fa2bc391348c11e285c516410" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "9069b7c9d344f8a908c3fddbd7931f2d" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "01ba42d58853406e16177511085ae4bc" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "3cf91f259846aba6d64f4c5b768a1130" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "40bc2053fd0c2161b0944ece22441e55" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ff21070ed212ae20ef8b9e3172187e89" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "2c7be3c79976c795b34a6dfb82de18e0" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 28823552, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 19349504 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 19369984 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24612864 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28807168 } ], "md5sum": "004727cc386328b87a355d5f1e807612" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "be9f2fdd52f609c48dc8b45759663616" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "d1bb30f1073b1750b1f3f0c2b003d980" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9c958d834508f5e843772734a04ad8b0" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "013eed42d22cb29513fdca0620a7a527" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "177f7cf72e6c8043f1f68ff9044ddad5" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2e13888864d811418bd16737ac5bd4fc" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "f49f6fbdb742405817e0026eefe7f1c2" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "d5d07df5e24e67e273f5b60059472abf" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "647592a8a202213357b2266283fc445b" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "8297d7766f18826b24355468dbcf4241" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "e016631be962226b7470aa353c4e7b21" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "5064784cb5ed816d6a63c5db142757a8" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "1b13f08bafe344691f4a5af2608bb82a" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "d05f654e0745eb748b885ae02cf792e0" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d176f85ebb8be6bd4a40b2b66a92cc52" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "966231433fc862ede85ac7511901dbf9" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "2429a3fd03b64c93f448629a06a3d14f" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "bc392b72c4d13f944a8cd3c64080ac97" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2a47ca7d9f53ad4a30deddd43a5aee0e" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "58a81cacad7e870993bf9c283e7ee874" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "f70e771bb4bde3f1a400a2bee17f12fa" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "8a6c2e2ff93a37912ae0ea39696cc8b3" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "346bab921753a13e021b577b5a48782b" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "996180612cb612745e44d2c9c6847a44" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "d72ac5aa5da7d9f9050d7ef6ec065147" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "f6c4fd38573baa56cab7b562bef945f0" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1a5ad946b79294c531c067fc72114505" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "19d6cd9caa86b3b0fb8190060606283c" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "c07fed218d53e20dea04c0d7cdf9e897" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "f4cda11ca0d0c949132da47e441a5649" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9377b86686ef72d1f5db74a6beee5083" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3747500e488a688976adbfc74f6c1666" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "491f5ebc8eaf6f934dae641c23aa10c4" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "965ac6d9477d19320a70b442dbb7d0c2" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "bdb4910b6fbd9663acd2d7f07650ad39" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "7e3412a93cac4def639dbb6082f6b8e5" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "814bdfca2a9f04639a1099f11dab885f" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "d0431c6094aea33c21c23f6f67abb06f" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9726fe82ffe1fe5c8d19323e0a4a7b91" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e338641efb324b9ce4694ea9993e266f" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "07f94cb416a7927f11c34697f63145fb" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "519ffc0d0fad9aac9be2ed10098f8a33" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e6efb5a44f45242cef67ca18d465f9ea" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "e1a4d473a51f48dbda6c9245adffc788" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "94c96e5142ce335340797cb752fa5cf7" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b1f69aea8f9028163236fef920e32df0" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "3c675a435d18fe85d71663c27a3315fd" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "8394c37272bc67f892f12f7a6247be37" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "d0a881e679bfaa6b1b1223dccca228e3" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "bfb67e1ac8257d2a1176075e756a9625" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d60fa9e7841b2930fe65ebab1024ed8c" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b5915a030bdbff8607c500569b007190" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "3890e1fe2d18523428f9cde24a88bd4c" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "90afa14248edb1a1ca6c7bf5d5a55b90" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "6ccd72910617128f0822c4df349eb750" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "6963e6d52aa89b859e0b38e12539bc65" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7f646335bcc86d1c76e784076331f738" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8da5a2bd8c94c7d10d1b82f88086509c" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "31e6387cb9eda7ebba9bfd87c663cbbc" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "64e3eaf51efe66bd6c93742bf896a22e" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "6c031982a2575b00e50400d91ec30f5f" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "95f297c928d9f0abb9042bbe5acc1672" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "f94f4957894c83a36a52aa7e9aed0a5a" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "609813ec9407a990470da5e4dd7b4191" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "1fa4192705c9ba12f277a88ff3cfea1d" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "56bc387718639a8b78338b4e58a95490" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "085faa5ea146e8707376a70d0497305d" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "ad2e2c85fedc76b5eba8a9502af6f00f" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "60b21a75e04afc6e60229f147b46923a" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3745640a3eac0eda2bdbac60c564cc9b" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "86e9eb38a2e616c2dace9d1720342b45" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "98a79c806c4d31659f2c02468a4cdf18" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f251c76c6fb18ef60db52a0b12be530a" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "10ba6b03d439ee093f2534e3db6ca147" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "85dbe359777deee887dc779e03e521f8" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "79da2dfc18a01ee599ff73e11ca48b1a" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "ebf58c6ed20cec2fbfa521ac0af7bded" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "e4396ad635f6379a40af7cecaabcd551" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "9fd8298dcd75720266ce767d93948ff5" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "556f12df1bbd5be7723bf7bd48456cef" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2182f1924d08d82bf17ac3ec7b74ff14" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "575a661aae7857f24ca66b9ee1ae797d" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "042543ba3639c6d4c7c3204e59e01e22" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "78d3a837c8d4f6ba71f41112ee929c57" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dbc8324a8623fdb7e9c1f48bb0994c73" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "50fc09e6cb13f22b487ff1f3b435832f" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "ae0833745cb560389fa06406965b18cf" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "4fd1ebb0f0a8842e1b201e2c371667da" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "f90ac6db698571089ef543dbcd1f00e9" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "223e1b46f76dd722f1d39ffefac864a1" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "9375e5ce322c5c42af42d6ae8ad34f99" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "96c45d0ddc125fb8d2f509b9dac7c85a" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e456a698f739ad15e473ea81d87dc935" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4ac603ba858c46870e76e8ac7e9b387b" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "645023d15fb16cb082cd8e7118059bc9" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "088a2ebce95f296df248c77228261446" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "20463ed1b5335509459032611d019dd5" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "e9e4991f58fe4bb4bccd209f3c9d8dc6" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "979cd80f68b9a7329299e1d91976d263" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d5b90cd620ae731ae642af363576612e" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "0a6e7d32060299848eb2a9dbec77a4a7" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "994eeb4ea96959cc29883786ddc2a239" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "74820f6de2bc0f1b6512379ae681fefd" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "8da9670b46ba81130fb63ee7e9967be2" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "51282d359ec3550bbb9715a0cee70ae4" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "87475d15847afbe4d6554f8395c24df9" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "d398ef23cee00882e024679f6650d42f" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.48.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "20ecc773f3215ca6bfcd5dd6288bb6f3" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "6fdd3ae9120a5585d1058f6c57206d21" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "87e2d9202e6bc31b302c52ea357ffbf0" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.49.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e85b26d52bf75b1bd8379c52c8fcfab8" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c2b74caa5a71e2e438170ed96c944cb4" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.50.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "da6cefaed28937bdb0a55b9a982ddb2a" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "08832fe53cff593dbe2857c60896fa8c" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.49.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.50.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "28982eaa0ea4ec003418940800539a0d" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "0fe5b919e69e7b4a52897db16472c7a6" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "33dbf930a9c82072f878a29c86726a78" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "0339b65736ac2f924685feae893d1068" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "48da6044e09af7b234f64607216769ca" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "7aba992aa3d3d66a4e5bdd955b8902be" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "b7dc2367c52557c31566b1d9d0541219" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "96ca3160305793859bfba28e4caa30d9" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "64b4c80746b6df3df2e86f2bf0ecf9dc" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3185b81f3775445af0f99bba81379c1f" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "fdd3b9dcdea93b853f649301bed27686" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "f5a2563b2c49a0a77ce0532bc99c619b" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6138779f1c94995bd60011803cf08c11" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a6f90fddbd35e5e7c01a7adde0d22c2c" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "26fd7dcee4721b023e060948560bd1e7" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "5b203bfb08c68275896a577c0479d2b2" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "c8d5bfcec784c6b4a9b51de6400e9563" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "efafa57a8802794cf8d0f2920b248a57" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "7a2458f4366c3dde4cfe52f89efda353" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "12daf0f1ca2bec712842ff4ff64c8db7" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "6c0a7e047973f3a949509694570af582" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "5114b3cfb7b813b3767957c67c258e4b" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.51.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b8bcc298839506c6c3807b771c6ef697" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e67f678f28a956b7bc9af9c2e70b0f22" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "dc4f9bf0bbdf52d8fd5a7e4e108ed9af" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "13fba0ce5128c03172b3ffb7f5892718" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.52.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "01a1d2270f087220e732b81f7f9598c5" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "225343a6aeccce1db3bb0fc78333afb9" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.51.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.52.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "6c6dff8d9767e97c99579910d655ebf4" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "1778c322f6a95b3bf337b19f118dfd91" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "2860ac2798f005a0e241dcf8a8a4c49b" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "b2df0b5cad684baabcffe9a1a63d3590" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "eafa9ea8334a245f9347a1d9005363ae" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "0098aa67dc082b9cf1ccf20394aef6d9" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "db95e1f3c78b6dbbfd2d08c6fdf25eba" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d71f03d9ccc44fe9980f5f1f2d6238be" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "9bafc1a3aacc46fe3c49f853091f9442" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "8584a8d94641ec4366f166281281451e" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.54.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "79b255a094aa44569a7b2755321eced3" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "124df672c056f8eda2b81dbe191c2142" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.53.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.54.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "c9f7fcd7f50fecac407524827ccbe7a6" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "d31b39f0a5036ef3a398b72f19c62b3f" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "bfa9154c4e86ae182b8bd9bb6555da25" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "e317064e16f2ded3ce62a9d1667149f0" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "f0d02968561ac461efd3d24d6e0162c5" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "5869068f582204230f4ff6b3e90bfdf9" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.55.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "934d63f92869e2384cd13f841c28dae9" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "04c3c7a697209799e62292b1fc627cd4" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "5bf58ebe2884457d3888db91d4a4f08f" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.55.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "1aa9aea243ffd23ff80e9d0e5abd59f0" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "bb418b0e14148ff73024cd5df73bda01" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "4c4a62f4e81fa7036d5d0e6bc0756d62" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.56.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "191504acf30dd5de1dcdd14a905b97a1" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1510ace2975d58be052b978fa4290241" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "b390f3e339d2744ff0e41f197ddc7888" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.56.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "cfab7563846bb74527cfcabc015d1aa7" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "592b95e9a67b0d2a720fc95ebb189487" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "515771b478d096e071853f0b4b30c3d4" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.57.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dfd44d5b2f344acab323435b09ccd58c" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "01b255d08fb549fda099f7c42400b373" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "a2f359056d6be32fef5c27b2a58703db" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.57.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "3bad04cc24fd5b109217ea0a62be8a85" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "a18b8d10f65fa77db9f8725e83f316af" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "04cda4c47b92b412fa3121443c82b91c" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e27dd053975964aed41cb2d188ee53b4" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "568f518ec2811cbbec9531a73b02fd94" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.59.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e181d19f4dfe727a09cb6aacb8f647d9" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6acb838708bef07f758605f7a1e9303b" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.58.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.59.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "80a27a28d1928a677f1cfb72060a4dcd" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "42bb7e78ba3c29afc70fb0e4b1bca422" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e81237b99a54eafa8a12c419ca84bb1e" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "8e5062103493b9cf350ad526b2fe2d7b" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "49bd281d000f7a2ac966047c2963407d" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "fd52535d6c864eb909d164e493e48488" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "8a46729643d3af3110b1b46c173de461" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "386899b2a15c289ec58d438a5f55423b" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.60.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "14da2021f502f125ad296a7697185492" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "afeecaa99fd9dbce5db29f3e55f989ee" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "60a927269c4a18bb3a78cdf57f444066" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "16eca2cec4e7d4f62fb66d17920b2e6b" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.61.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c6a27961e4f0d6b20b1ca7458db2f73d" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c844f286524e2e461be8094b8fb4cb25" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.60.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.61.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "8af477b35fdf1f58eee4af94d1369e8f" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "05da42d55b3522f112372f566abdc618" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "4b109a2995eaa1e8b43db48050834558" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "5f10677474fc89dccd0dd566c9a0729d" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e8952ef0336c8ecf48e0a109c6bf5770" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "deae6ee0177e32d683f517defa0450b4" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.62.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "981fb861b87e1f242303d0cb8ff06508" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7fc59a406ad777af8bd094f70f3d12f1" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "bf4e7ca7ab89dcad5f092bc2cd483f74" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "df0e81571ebf0f8191669e502d763624" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.63.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e6019cbfef0ce5041472d16283c0f4cb" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a56f3420b4f0861d8b793ebad6a42bcc" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.62.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.63.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "8856df7b9062b2122569d04d17fc1b05" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "703844cc3889ec995097ea2f930e1553" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "fdb81c674a1e5d2c13ae312001dcf9a9" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "f4ea5fcce63f50d7d204756ab5cc6ccd" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "002a3450981d71bd51de1d8e3fa49a98" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "2637c93b83b9fafb326a0a7bfed082e8" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.64.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9e80bc67e433db1ffbc4695fa0f68228" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ff0f2d60dd6cf65e27a2f39113e91255" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "8e2added901975d6b2408621ea8bae28" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.64.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.64.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "e662375d17432880f7b5257ae1c705bd" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "c855d6f8407308744ed7147e53338b30" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "05f0b184a46f64bd0820a78fa1ba4dbb" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.65.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "05895f449017391a82176faeb0661113" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "35202135b460c8f93b595602e11d1659" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "736eec54728115bff47097ace23e1920" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.65.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.65.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "de89ca3b61c0bb816ded6e9c61f0643c" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "26fc696eb7fbb9b559a0fea6275de9be" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "a37d144cd91864bc53421c1a109f7d35" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.66.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "63bfd3f434129ef21f2de74e843f5de2" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a08ea95369284e91d4708833129f650d" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "7f795869c3eb8be208e2a46335291a89" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.66.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.66.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "10157d1b46e489a831fa66031ecfc3c1" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "ba6ae882a7b1ec6747b4b0ced7d8f907" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "13c5f85618cc4407da1b14fd6653b138" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.67.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "db7c62b5afc64a40efabc0e3409847e3" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "985a62061328d3e7de03692cc80d06ab" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.68.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2050dc8c2673130097071d5ad141171e" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "64b0660863fcc99921ebcf6714311c0f" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.67.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.67.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.68.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.68.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "0e99b738beb0106cdd1fd075cafca5c0" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "66886225f09e0219d69ee1e0c1d3698d" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "b5ce842903a77284da60f55cecb927b5" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "867aa78ae7dc273abe1e187847485545" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "8df2408499975a96e85801f47d0962b8" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "6845692e76bed2a3f3dd9c51089678c3" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "a32c15f1e06e8a94a3df858a9c76b2aa" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "9eeb41e2bf72812b371a342da5a8767a" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.69.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f812e92e6f128925432edf3466f5a52c" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3416cd1e1e565f93059aa5858494cea1" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "50c0bd3ec961a5364f3b30af07553e21" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "777fedea7cdaef877347caf9ea0c5d17" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.70.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8f449bd40b4909d5a0b45c208d31f94b" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "afb5d536d98e637ce09770644e951829" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.69.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.69.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.70.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.70.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "53d19d12e31fc90ea622ee1e5d7ef568" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "c0569dadd700d0d5c23a8952487f8c37" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "78929f9802b96894abe332462c101abc" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "35bb8f08df58b36a86d4223dd2a09a9e" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "7e3926c1dd9fc9d1ae640d326c83f1bc" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "2586f79b78f8750f399e04deee7ecee1" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8a4afcb8ee093e0f531e678ee5e4d0b7" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fcc414164d008a20f9a843f8ca7914aa" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "53f8bd67bcf5ff30da2f160766af4ca4" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "0e8b2859c1d4695c49dc1fceca2c2a40" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "59177ff92c33f3928edfbbf7ab0a106a" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "38ca36a3d0f94be029ca6174abb5cd91" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "ba744fa37039f049605b153058134249" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "501972c5635364d074814db2dd0862a0" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "634d9538ce97e8a60e744bdff03eaf7a" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "a55bfcd929b8d2d59c7c4612f6a0f721" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "78568195355eb285572024b6529d32c3" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "1a5c7cd99f996fdb1d0cf0d8fc1f5315" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.71.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "23420f8a4711ff4f0abe0d7ea442b346" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9078a57850bc4ce34e59295f6f94559d" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "6201423f81f666ae02dbe939346af362" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "dee17fd1059d10a2f493e018e4eb9093" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.72.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2a3f998f0ad1806bc77b421a5d20706a" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "359c66b22f1d75c6d34864ab6c8d26d2" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.71.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.71.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.72.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.72.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "c5b1faddbb90c1527ed5a9d575bf141e" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "447efcdff4d958e5a6f91fce9cf4a575" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "6eccd428c79795cabb83b6b61be16a31" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "b721c13cbd27f686ba510360d4c51b9b" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "d2cfca21beec6e1210828393cea5c83e" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "fe71a005cf9b3bb137aeb496f46d338c" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.73.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5c907574c1e608d2b737fee5dd1d3e60" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3e815f435e59a19b2e1618a60192ff80" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "f1dbcb94ffedae91ceec5b9c3f1a6313" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.73.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.73.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "972f0d2daa88837d0c464b7cad7f8234" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "0e6b2f62d16d0cc2d2d05fae45ec3ea4" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "675f9fda9b4b3871a2c5c3ad14e84456" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.74.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3b3c8e1686655633110eaa6a78c9c411" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "024289610c61bc23b15fbc0c2f6dd5e9" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "5605f345957c222dc4704ab5c6efc724" }, { "dataPath": "params_shard_456.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.74.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.74.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "caf17a86fcc680146a57b2b3cb7e3909" }, { "dataPath": "params_shard_457.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "4d08a510ef6839a5f987a2a78789ba86" }, { "dataPath": "params_shard_458.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "b981cdb56ae169046a84508971c39805" }, { "dataPath": "params_shard_459.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.75.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cfd4bf30483f9fab82aae3c46edf4d2a" }, { "dataPath": "params_shard_460.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f24c1a1d5dfe3d899f714fab63c376dc" }, { "dataPath": "params_shard_461.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "28771f655018fd09106f86595c013b4b" }, { "dataPath": "params_shard_462.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.75.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.75.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "65dbbafbbce046750d3e0106860e8a4a" }, { "dataPath": "params_shard_463.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e9afbc408e704f3d5d561a6875451ea9" }, { "dataPath": "params_shard_464.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "8b6717f00c8e50c06cc41096929569a2" }, { "dataPath": "params_shard_465.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.76.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "da6501b3f00cf2d7c748051840fbda1b" }, { "dataPath": "params_shard_466.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "aaa9b78f595d037dda86bd2030a30dd8" }, { "dataPath": "params_shard_467.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.77.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cf850c012ab0b89f0e0dc52226aaf7a8" }, { "dataPath": "params_shard_468.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cb7aebb0d036c9f73a160df5f71705b9" }, { "dataPath": "params_shard_469.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.76.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.76.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.77.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.77.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "6f9e89134c8ab60f947edaf534c090a3" }, { "dataPath": "params_shard_470.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "6b64b0f56f02307349ff9a0f4fe891e1" }, { "dataPath": "params_shard_471.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "62d260faa6ae57e40f02a2a905b52351" }, { "dataPath": "params_shard_472.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "95eccbf08b80145101a37407b5fcb3aa" }, { "dataPath": "params_shard_473.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "44c6f9b3fa76867569ef679d5a5aab39" }, { "dataPath": "params_shard_474.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "46af5499164af67d987b0a3499c1c7b2" }, { "dataPath": "params_shard_475.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "ee3766cba64b9ba83e59250120dc9ca0" }, { "dataPath": "params_shard_476.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "37f98d0d2c861c60bafa79f708c9fa68" }, { "dataPath": "params_shard_477.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.78.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ca2bfb62639b56966b5f41ee880279de" }, { "dataPath": "params_shard_478.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7fec0f7f938b368687a4604ebc03f946" }, { "dataPath": "params_shard_479.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.79.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f502aa052d14efbb23941dc9b910987e" }, { "dataPath": "params_shard_480.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f052293ddaff03aeb422db0a4accf56a" }, { "dataPath": "params_shard_481.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.78.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.78.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.79.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.79.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "570146d7d4ecbbc089d6d19feee3a900" }, { "dataPath": "params_shard_482.bin", "format": "raw-shard", "nbytes": 4194304, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 } ], "md5sum": "ffbde232efd82dae300c4b34ce097781" } ] }