diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4439 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 3790741504.0, + "BitsPerParam": 4.500454373872803 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 512, + 32000 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "1f4a82ad5ec87bb98cd4d7673347efc3" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 30744576, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 128, + 32000 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8192000 + }, + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 8200192 + } + ], + "md5sum": "81d6d4b356190fa4c86b679480fee5d8" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "de2a8096f9f88a04f9fc917b31b35542" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a46b65f36129a94ec8a314bd14f0e45a" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "7a51f94474228a3603dd1159cb195198" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "7f2488d7655c6898befc96a0ddf4991d" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d84fc5872118bfef4f4472ebf0c90222" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "0783772a70e0fa6230b3e447aa848a09" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "18a05f56b718f1986184d907f0a718ab" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "555f0adc4b0452c2b51b452aada6dcf1" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "51b2a02bd98285adeb7af297965ff105" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "6da0d51774ff7f94f2bb5553cb971715" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "f6e9ff895bf608a9f106a055cc493758" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b0558cc841eea040680fc35faac2d47e" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "09f27256be296492adb6a6c9a18b6ddf" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "0a409d93188e0107f26ba157d35c9776" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "48e3049357ff339bf9310a77780aff35" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "79b2b179b75de456f710d9ec17b26668" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "f3cce2df7a0d9653b2b3f87abaac7edd" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "47f870a04b3530b74a0c6c6d102822ca" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1eb88687975444b99fb3bae38ec9013e" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "5b3532ebb1f971324f5ba24ddc856759" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "7d730e39b8bfe7278d80df21ea6805dc" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "a80cbd6925040bb26e7c02085c52d82a" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f69c18c2fd8f8409d84b6d986239061a" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "fe4da2cae6c81f4a3a1cae2913efdb4d" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "e515a526a305eb5c70e44322f08ab4e3" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e792c812a2f83e59966cc70faa4cbb84" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "871d6b82c4008b9cf57871fdd27675e3" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32000, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "66ad602d220b664a12421ccdb3ccb5d6" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "483c54e2e160eddd837a4019144bd5e3" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "1c89fe862c3682d4cc69b09ebcfe699b" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e5bab33283337abfbd682081b2db2e88" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 29253632, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32000, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 9445376 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 17637376 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 17645568 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 20463616 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26099712 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 26107904 + } + ], + "md5sum": "b74a9f7644c81615d0988d73ec76e6d9" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "c535f1aaad32a09e35849148ffcc1b92" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "5aefeeea789860db9e3ad56c7d89733b" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "69c5bc396194a94ae9ac00b5935d0f99" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "4ae43da98ae50c954748c5d08730dcc6" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "f6434480e6fd566938648d9c43f93e34" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d458812004410aeb7f4ae0a5072012cf" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "589e8bbdcba634141ddc24a25294f483" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "773b32efade8b0718eaf965a643b7086" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "fa28b86f15481801f20752473c954418" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "fbcf5f226d251f37753b034dfee96277" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "2dfdf1ac84afbcf0954b60e0cc76e10a" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "1c43aa61841eab7cdd2edc7df34b0c62" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8344d8330071ebac836493a59eab6756" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "e9c79b2973e1e1d9af29fbc9b228bda0" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "524671d7fe32c01b39de156ca32ed894" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "9ab63405afed09317510f1012f58ae07" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "556c6cb05446a52ae517279dd027bafa" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "5798f20141abdabc97d5ae2f605d6dcf" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "79fdbe9fb9f237930ec977a7e08592be" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "81cf6f83afff87257b88654c09181bf6" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "d45b10a7096cbbd58c9baa80671d88b0" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "18a0991dfb41923ae239f6b20c3fb679" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "b9540103ee1d6cc1e61d15d927ed3c1a" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "025fa4a93bac51c39ab33b87e6c1b363" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "028987c2c859527bd137121460398360" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "5d6fe72dffad053303b76ca30d1013c5" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0ebda4c41079987db61ab9d8ca17dfb3" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "cd7ae7ce79d2e7c8869f23c2660bacae" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "476939d9881534eff6b57694a5a834b7" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "345229ee8ce546ebba476f73de0f8407" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "38e3ec6990b9d0fe8d245ea6d04f4666" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "7d2042f62a22bc08800a32ede3842dc3" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "338c6d5b73d1f85ecb39546b6e57e662" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e43d959abf6d9426108f207058dd409d" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "fc7667184247c36cbbc420e66a9def18" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "117ca2daa7c3a8a98b65df4099e048b8" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "8805ec87d3491bea769dd6ba3587b660" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b956b7391d8e339e9ac00098f5c105de" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "7b7303d99779ca827a5259529a004c20" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "14293fcf561101c4b0f9aec0339e235d" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a3cb7fe3e7deff66d15ec071a9cf3497" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "cd8ae8f1c82356fb61fa38d724caf94a" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "00e0ffd692be7584bf0e86486c185157" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "d434a758c7802e791d23e18fdd5b6ceb" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a711523252b94841546d52a3c588aedc" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "ae5a7d5de42488f11aa1b6a254b10796" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "27d8bb427f7a523a8172becb7cf2883d" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e4e749f189e5b7427f5d4849a12eddde" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "1cc9f636f68e7d34a1dae2718925381b" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "a786f855937e6afad2a090b94a0e22eb" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "17b7bb6e498cf01d81d58864184ada1d" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "831a2a3ca34fa20430997b792cfdbc95" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "f2c4f6c9268ec7689c19e0b0136bac30" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "2c9b5122c9a5009214b153f18bc40c06" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "90aba11280b2497e3d87c0d8cd039227" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "b4062413295d3c35b5415658f185fd56" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "a24cbc8d0d6cc845a3d6991187f03b32" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "9bfa3601a15e70c8922fc4ce6a36f39d" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a4eb263c7f62778d1554e2d58a9f63f5" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "c50a6e70c1bfcff2c9ba3614c973407d" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "40b0bc31f299e13ed0842e4020347a6e" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ed68e5c9143bfa95652a3d311c35484a" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "d7990b18766dd5e6e32526459fba8b82" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "a4eab8b81fe6cfafc1665154ca1b6fec" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "6b50403460078e0273a51d9d2bdc0e8d" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "616968fb8820358f35c02d2b97239fea" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "14f52006bb7abefc3111bd59cf78189e" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "a3a28a5864560c6a7b307b9866d7804d" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0fe1c035d559119608334dc0541968ff" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "7f12986e9d183e6e1ec313b8c5b3c7e1" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "37d301fc44ac1f1732b257ebb2365bb2" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "8eddca468f292014cfa9601653cbf756" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "179c48f789a4fc64840c8d62f607451d" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "dcbaddd846fac718bd1ff17657539e2c" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "32485e9750b0091db8e28d4929adf778" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ab4263984f50c21c92461c2cb840a186" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "5fbbbc2db1b3eeb5ce034c6f2d54bb60" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 1376, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "140d84946a3ff33a1708765175266abe" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 22016 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "fb494bb043517319bb2f1cc1aa7f01a4" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "70d12a24fae27400c7de379be72e5e29" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 21045248, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 344, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 128, + 22016 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 128, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + } + ], + "md5sum": "e9089ddbaa539c246e3f09408d2d1e77" + } + ] +} \ No newline at end of file