{ "metadata": { "ParamSize": 323, "ParamBytes": 184506240.0, "BitsPerParam": 4.079501826759035 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 32711040, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 49152, 120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23592960, "byteOffset": 0 }, { "name": "model.embed_tokens.q_scale", "shape": [ 49152, 30 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2949120, "byteOffset": 23592960 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 26542080 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 26544000 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 27772800 }, { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 27774720 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30232320 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 30242560 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 30244480 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 31012480 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 31015680 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 31476480 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 31478400 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 31480320 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 32709120 } ], "md5sum": "bdc513266d19e329e8ea03c7fd646d72" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 33323520, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2457600 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 2467840 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 2469760 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 3237760 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 3240960 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 3701760 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 3703680 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 3705600 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 4934400 }, { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 4936320 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 7393920 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 7404160 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 7406080 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 8174080 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 8177280 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 8638080 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 8640000 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 8641920 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 9870720 }, { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 9872640 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 12330240 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 12340480 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 12342400 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 13110400 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 13113600 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 13574400 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 13576320 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 13578240 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 14807040 }, { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 14808960 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17266560 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 17276800 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 17278720 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 18046720 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 18049920 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 18510720 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 18512640 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 18514560 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 19743360 }, { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 19745280 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22202880 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 22213120 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 22215040 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 22983040 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 22986240 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 23447040 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 23448960 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 23450880 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 24679680 }, { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 24681600 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 27139200 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 27149440 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 27151360 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 27919360 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 27922560 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 28383360 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 28385280 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 28387200 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 29616000 }, { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29617920 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32075520 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 32085760 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 32087680 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 32855680 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 32858880 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 33319680 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 33321600 } ], "md5sum": "b77eb312be4028519fb31c903fa3d657" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 33318400, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 1228800 }, { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 1230720 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 3688320 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 3698560 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 3700480 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 4468480 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 4471680 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 4932480 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 4934400 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 4936320 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 6165120 }, { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 6167040 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8624640 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 8634880 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 8636800 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 9404800 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 9408000 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 9868800 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 9870720 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9872640 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 11101440 }, { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 11103360 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13560960 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 13571200 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 13573120 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 14341120 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 14344320 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 14805120 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 14807040 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 14808960 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 16037760 }, { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 16039680 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18497280 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 18507520 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 18509440 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 19277440 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 19280640 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 19741440 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 19743360 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 19745280 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 20974080 }, { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 20976000 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23433600 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 23443840 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 23445760 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 24213760 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 24216960 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 24677760 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 24679680 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24681600 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 25910400 }, { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 25912320 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28369920 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 28380160 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 28382080 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 29150080 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 29153280 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 29614080 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 29616000 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 29617920 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 30846720 }, { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 30848640 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 33306240 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 33316480 } ], "md5sum": "6af4acdb079e016a7ac1b73bbf19d6ec" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 32084480, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 768000 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 771200 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 1232000 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 1233920 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 1235840 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 2464640 }, { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 2466560 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4924160 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 4934400 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 4936320 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 5704320 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 5707520 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 6168320 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 6170240 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 6172160 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 7400960 }, { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 7402880 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 9860480 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 9870720 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 9872640 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 10640640 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 10643840 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 11104640 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 11106560 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 11108480 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 12337280 }, { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 12339200 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14796800 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 14807040 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 14808960 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 15576960 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 15580160 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 16040960 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 16042880 }, { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 16044800 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 17273600 }, { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 17275520 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19733120 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 19743360 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 19745280 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 20513280 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 20516480 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 20977280 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 20979200 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 20981120 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 22209920 }, { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 22211840 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 24669440 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 24679680 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 24681600 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 25449600 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 25452800 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 25913600 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 25915520 }, { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 25917440 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 27146240 }, { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 27148160 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29605760 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 29616000 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 29617920 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 30385920 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 30389120 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 30849920 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 30851840 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 30853760 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 32082560 } ], "md5sum": "c3bf3bbc69d1b48f1bb91f3c4a6731bd" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33323520, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 0 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2457600 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 2467840 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 2469760 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 3237760 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 3240960 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 3701760 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 3703680 }, { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 3705600 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 4934400 }, { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 4936320 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 7393920 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 7404160 }, { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 7406080 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 8174080 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 8177280 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 8638080 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 8640000 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 8641920 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 9870720 }, { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 9872640 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 12330240 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 12340480 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 12342400 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 13110400 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 13113600 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 13574400 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 13576320 }, { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 13578240 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 14807040 }, { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 14808960 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17266560 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 17276800 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 17278720 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 18046720 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 18049920 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 18510720 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 18512640 }, { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 18514560 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 19743360 }, { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 19745280 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22202880 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 22213120 }, { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 22215040 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 22983040 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 22986240 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 23447040 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 23448960 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 23450880 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 24679680 }, { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 24681600 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 27139200 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 27149440 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 27151360 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 27919360 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 27922560 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 28383360 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 28385280 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 28387200 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 29616000 }, { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29617920 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32075520 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 32085760 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 32087680 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 32855680 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 32858880 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 33319680 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 33321600 } ], "md5sum": "327c6d17328d5d82339fbaf72f278b45" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 19745280, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 0 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 1228800 }, { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 1230720 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 3688320 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 3698560 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 3700480 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 4468480 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 4471680 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 4932480 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 4934400 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 4936320 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 6165120 }, { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 6167040 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8624640 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 8634880 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 8636800 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 9404800 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 9408000 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 9868800 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 9870720 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9872640 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 11101440 }, { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 11103360 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13560960 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 13571200 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 13573120 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 14341120 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 14344320 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 14805120 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 14807040 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 2560, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 14808960 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 16037760 }, { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 960, 2560 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 16039680 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 1, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18497280 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 18507520 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 960, 800 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 768000, "byteOffset": 18509440 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 1, 1600 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3200, "byteOffset": 19277440 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 960, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 460800, "byteOffset": 19280640 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 19741440 }, { "name": "model.norm.weight", "shape": [ 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 19743360 } ], "md5sum": "cbb03a273d447198c4cdbe6ddb1e84cf" } ] }