diff --git a/mlc-chat-config.json b/mlc-chat-config.json
new file mode 100644
index 0000000000000000000000000000000000000000..93ea1ab28ce25928d6b3e18f0d6624701ce243b3
--- /dev/null
+++ b/mlc-chat-config.json
@@ -0,0 +1,42 @@
+{
+ "model_type": "minicpm_v",
+ "quantization": "q4f16_1",
+ "model_config": {
+ "hidden_size": 2304,
+ "intermediate_size": 5760,
+ "num_attention_heads": 36,
+ "num_hidden_layers": 40,
+ "rms_norm_eps": 1e-05,
+ "vocab_size": 122753,
+ "position_embedding_base": 10000.0,
+ "num_key_value_heads": 36,
+ "head_dim": 64,
+ "sliding_window_size": 768,
+ "prefill_chunk_size": 768,
+ "attention_sink_size": 4,
+ "tensor_parallel_shards": 1
+ },
+ "vocab_size": 122753,
+ "context_window_size": -1,
+ "sliding_window_size": 768,
+ "prefill_chunk_size": 768,
+ "attention_sink_size": 4,
+ "tensor_parallel_shards": 1,
+ "max_batch_size": 80,
+ "mean_gen_len": 128,
+ "max_gen_len": 512,
+ "shift_fill_factor": 0.3,
+ "temperature": 1.0,
+ "repetition_penalty": 1.0,
+ "top_p": 0.5,
+ "conv_template": "LM",
+ "pad_token_id": 0,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "tokenizer_files": [
+ "tokenizer.model",
+ "tokenizer_config.json",
+ "tokenizer.json"
+ ],
+ "version": "0.1.0"
+}
\ No newline at end of file
diff --git a/ndarray-cache.json b/ndarray-cache.json
new file mode 100644
index 0000000000000000000000000000000000000000..b40d4e8c51a0286d36ecba9ae7ca218b82728874
--- /dev/null
+++ b/ndarray-cache.json
@@ -0,0 +1,10252 @@
+{
+ "metadata": {
+ "ParamSize": 898,
+ "ParamBytes": 2946802496.0,
+ "BitsPerParam": 7.478748540353469
+ },
+ "records": [
+ {
+ "dataPath": "params_shard_0.bin",
+ "format": "raw-shard",
+ "nbytes": 28214784,
+ "records": [
+ {
+ "name": "llm.model.layers.36.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.36.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 4608
+ },
+ {
+ "name": "llm.model.layers.36.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6640128
+ },
+ {
+ "name": "llm.model.layers.36.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7469568
+ },
+ {
+ "name": "llm.model.layers.36.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8299008
+ },
+ {
+ "name": "llm.model.layers.36.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21570048
+ },
+ {
+ "name": "llm.model.layers.36.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23228928
+ },
+ {
+ "name": "llm.model.layers.36.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24887808
+ },
+ {
+ "name": "llm.model.layers.36.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 24892416
+ },
+ {
+ "name": "llm.model.layers.36.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 27546624
+ },
+ {
+ "name": "llm.model.layers.36.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 27878400
+ },
+ {
+ "name": "llm.model.layers.37.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 28210176
+ }
+ ],
+ "md5sum": "f8e67c90b09aac2224d5904e63e58c0e"
+ },
+ {
+ "dataPath": "params_shard_1.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.37.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.37.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.37.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.37.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.37.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.37.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.37.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.37.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "0396e3fb3c2e5390da2bd35437bd06c1"
+ },
+ {
+ "dataPath": "params_shard_2.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.37.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.37.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.37.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.37.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.37.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.38.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.38.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.38.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.38.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.38.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.38.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.38.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.38.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "11a00c2997f26c892a74297db6fea4ac"
+ },
+ {
+ "dataPath": "params_shard_3.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.38.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.38.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.38.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.38.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.38.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.38.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.39.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.39.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.39.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.39.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "ef0e9206b7a37cc3fbeac9010d160a8a"
+ },
+ {
+ "dataPath": "params_shard_4.bin",
+ "format": "raw-shard",
+ "nbytes": 29882880,
+ "records": [
+ {
+ "name": "llm.model.layers.39.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.39.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.39.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.39.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.39.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.39.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.39.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.39.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.39.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.39.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.norm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ },
+ {
+ "name": "resampler.attn.q_proj.bias",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29869056
+ },
+ {
+ "name": "resampler.attn.k_proj.bias",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29873664
+ },
+ {
+ "name": "resampler.attn.v_proj.bias",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29878272
+ }
+ ],
+ "md5sum": "7daf6065d76a70a1f5b5f822f0c2ff6e"
+ },
+ {
+ "dataPath": "params_shard_5.bin",
+ "format": "raw-shard",
+ "nbytes": 31855104,
+ "records": [
+ {
+ "name": "resampler.attn.q_proj.weight",
+ "shape": [
+ 2304,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10616832,
+ "byteOffset": 0
+ },
+ {
+ "name": "resampler.attn.k_proj.weight",
+ "shape": [
+ 2304,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10616832,
+ "byteOffset": 10616832
+ },
+ {
+ "name": "resampler.attn.v_proj.weight",
+ "shape": [
+ 2304,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10616832,
+ "byteOffset": 21233664
+ },
+ {
+ "name": "resampler.attn.out_proj.bias",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 31850496
+ }
+ ],
+ "md5sum": "2b1c44924657428b1fb1b61f37568346"
+ },
+ {
+ "dataPath": "params_shard_6.bin",
+ "format": "raw-shard",
+ "nbytes": 31002624,
+ "records": [
+ {
+ "name": "resampler.attn.out_proj.weight",
+ "shape": [
+ 2304,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10616832,
+ "byteOffset": 0
+ },
+ {
+ "name": "resampler.kv_proj.weight",
+ "shape": [
+ 2304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 5308416,
+ "byteOffset": 10616832
+ },
+ {
+ "name": "resampler.ln_kv.bias",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 15925248
+ },
+ {
+ "name": "resampler.ln_kv.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 15929856
+ },
+ {
+ "name": "resampler.ln_post.bias",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 15934464
+ },
+ {
+ "name": "resampler.ln_post.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 15939072
+ },
+ {
+ "name": "resampler.ln_q.bias",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 15943680
+ },
+ {
+ "name": "resampler.ln_q.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 15948288
+ },
+ {
+ "name": "resampler.pos_embed_k",
+ "shape": [
+ 256,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1179648,
+ "byteOffset": 15952896
+ },
+ {
+ "name": "resampler.pos_embed",
+ "shape": [
+ 64,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 294912,
+ "byteOffset": 17132544
+ },
+ {
+ "name": "resampler.proj",
+ "shape": [
+ 2304,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10616832,
+ "byteOffset": 17427456
+ },
+ {
+ "name": "resampler.query",
+ "shape": [
+ 64,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 294912,
+ "byteOffset": 28044288
+ },
+ {
+ "name": "vpm.blocks.0.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 28339200
+ },
+ {
+ "name": "vpm.blocks.0.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 28341504
+ },
+ {
+ "name": "vpm.blocks.0.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30995712
+ }
+ ],
+ "md5sum": "8c85b7bd0eead67f22ead18c9077ff53"
+ },
+ {
+ "dataPath": "params_shard_7.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.0.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.0.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.0.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.0.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.0.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.0.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.0.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.0.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.0.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.1.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.1.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.1.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "148154ffdd9609f708ea350a81447b4e"
+ },
+ {
+ "dataPath": "params_shard_8.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.1.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.1.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.1.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.1.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.1.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.1.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.1.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.1.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.1.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.10.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.10.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.10.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "ff874568c12fe5939349233756af547b"
+ },
+ {
+ "dataPath": "params_shard_9.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.10.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.10.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.10.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.10.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.10.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.10.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.10.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.10.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.10.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.11.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.11.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.11.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "b2d9b7389d8659c55bb0170fedc6a7c6"
+ },
+ {
+ "dataPath": "params_shard_10.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.11.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.11.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.11.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.11.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.11.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.11.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.11.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.11.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.11.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.12.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.12.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.12.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "0be2d0e1c41e3d8682563120ea67d256"
+ },
+ {
+ "dataPath": "params_shard_11.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.12.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.12.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.12.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.12.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.12.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.12.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.12.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.12.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.12.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.13.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.13.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.13.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "43ac6fdb63df609bf5c5b931ebfd02be"
+ },
+ {
+ "dataPath": "params_shard_12.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.13.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.13.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.13.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.13.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.13.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.13.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.13.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.13.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.13.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.14.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.14.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.14.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "1b73e96ed7955451d7330c3e239a029d"
+ },
+ {
+ "dataPath": "params_shard_13.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.14.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.14.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.14.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.14.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.14.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.14.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.14.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.14.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.14.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.15.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.15.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.15.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "90a63aa3cbe6aa65d8015c5d04c7df81"
+ },
+ {
+ "dataPath": "params_shard_14.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.15.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.15.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.15.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.15.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.15.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.15.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.15.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.15.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.15.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.16.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.16.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.16.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "13deb7bba1d6cf863c267af36ed5c328"
+ },
+ {
+ "dataPath": "params_shard_15.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.16.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.16.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.16.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.16.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.16.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.16.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.16.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.16.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.16.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.17.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.17.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.17.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "f57edbac216a7abc1c7245d760dce3ae"
+ },
+ {
+ "dataPath": "params_shard_16.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.17.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.17.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.17.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.17.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.17.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.17.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.17.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.17.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.17.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.18.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.18.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.18.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "01a87a1e2b9786b1f4f7bc035d1803a8"
+ },
+ {
+ "dataPath": "params_shard_17.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.18.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.18.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.18.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.18.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.18.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.18.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.18.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.18.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.18.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.19.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.19.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.19.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "d0f9d6766e8431dd36ad01804680ed57"
+ },
+ {
+ "dataPath": "params_shard_18.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.19.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.19.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.19.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.19.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.19.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.19.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.19.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.19.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.19.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.2.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.2.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.2.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "c3e939ecc6766101890195f9306541a7"
+ },
+ {
+ "dataPath": "params_shard_19.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.2.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.2.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.2.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.2.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.2.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.2.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.2.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.2.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.2.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.20.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.20.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.20.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "25ba703b48b99e3958a8c6e702260ac6"
+ },
+ {
+ "dataPath": "params_shard_20.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.20.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.20.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.20.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.20.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.20.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.20.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.20.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.20.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.20.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.21.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.21.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.21.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "f978226ef7ffc676b7e378522c2f5d86"
+ },
+ {
+ "dataPath": "params_shard_21.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.21.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.21.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.21.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.21.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.21.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.21.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.21.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.21.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.21.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.22.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.22.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.22.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "080440b5277f12b1a7a930179c07d844"
+ },
+ {
+ "dataPath": "params_shard_22.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.22.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.22.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.22.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.22.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.22.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.22.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.22.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.22.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.22.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.23.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.23.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.23.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "92fd2ed2d8f130fa7b1f7b113e64f241"
+ },
+ {
+ "dataPath": "params_shard_23.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.23.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.23.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.23.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.23.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.23.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.23.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.23.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.23.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.23.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.24.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.24.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.24.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "93cd0cdbc450f195c4e55677ae9ca295"
+ },
+ {
+ "dataPath": "params_shard_24.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.24.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.24.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.24.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.24.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.24.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.24.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.24.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.24.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.24.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.25.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.25.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.25.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "cd8b3cb526ddd49ceff2a2896d0972af"
+ },
+ {
+ "dataPath": "params_shard_25.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.25.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.25.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.25.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.25.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.25.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.25.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.25.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.25.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.25.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.3.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.3.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.3.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "439ac788d351c31e6f79c2295d29aa6f"
+ },
+ {
+ "dataPath": "params_shard_26.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.3.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.3.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.3.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.3.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.3.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.3.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.3.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.3.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.3.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.4.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.4.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.4.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "d10909380d314a2183f5fc41159f00cb"
+ },
+ {
+ "dataPath": "params_shard_27.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.4.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.4.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.4.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.4.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.4.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.4.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.4.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.4.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.4.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.5.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.5.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.5.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "27c258b5c7236a6c2350c509ca909a62"
+ },
+ {
+ "dataPath": "params_shard_28.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.5.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.5.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.5.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.5.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.5.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.5.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.5.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.5.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.5.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.6.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.6.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.6.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "310e92af988bfd0044aaa1254816ca45"
+ },
+ {
+ "dataPath": "params_shard_29.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.6.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.6.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.6.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.6.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.6.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.6.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.6.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.6.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.6.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.7.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.7.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.7.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "f5e5eb08f57b993dd2e9072bfeda8c91"
+ },
+ {
+ "dataPath": "params_shard_30.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.7.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.7.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.7.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.7.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.7.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.7.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.7.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.7.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.7.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.8.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.8.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.8.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "c15edd322f242ec96a3111bf8e438169"
+ },
+ {
+ "dataPath": "params_shard_31.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.8.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.8.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.8.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.8.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.8.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.8.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.8.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.8.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.8.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.9.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.9.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.9.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "7dcdf464555a600541ad94f5250da0e4"
+ },
+ {
+ "dataPath": "params_shard_32.bin",
+ "format": "raw-shard",
+ "nbytes": 565645824,
+ "records": [
+ {
+ "name": "llm.model.embed_tokens.weight",
+ "shape": [
+ 122753,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 565645824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "9c9e93be3d2b223f6cf7126456b43b3d"
+ },
+ {
+ "dataPath": "params_shard_33.bin",
+ "format": "raw-shard",
+ "nbytes": 29771680,
+ "records": [
+ {
+ "name": "vpm.blocks.9.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.9.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.9.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.9.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.9.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.9.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.9.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.9.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.9.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.norm.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.norm.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.patch_embed.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27820192
+ },
+ {
+ "name": "vpm.patch_embed.proj.weight",
+ "shape": [
+ 1152,
+ 3,
+ 14,
+ 14
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1354752,
+ "byteOffset": 27822496
+ },
+ {
+ "name": "vpm.pos_embed",
+ "shape": [
+ 1,
+ 256,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 589824,
+ "byteOffset": 29177248
+ },
+ {
+ "name": "llm.model.layers.0.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29767072
+ }
+ ],
+ "md5sum": "2545431e3e4a7829e259a041a05fe90d"
+ },
+ {
+ "dataPath": "params_shard_34.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.0.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.0.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.0.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.0.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.0.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.0.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.0.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.0.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "9cb06aa4114a486cbf008b291e8d1ec3"
+ },
+ {
+ "dataPath": "params_shard_35.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.0.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.0.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.0.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.0.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.0.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.1.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.1.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.1.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.1.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.1.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.1.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.1.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.1.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "09a771b23cfb9458100a86f5e74b89a2"
+ },
+ {
+ "dataPath": "params_shard_36.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.1.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.1.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.1.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.1.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.1.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.1.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.10.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.10.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.10.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.10.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "f89bb4518e25a890bb16ac9f3d3d435c"
+ },
+ {
+ "dataPath": "params_shard_37.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.10.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.10.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.10.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.10.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.10.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.10.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.10.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.10.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.10.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.10.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.11.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "5da6926dfee3c6739e4669f75da9100e"
+ },
+ {
+ "dataPath": "params_shard_38.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.11.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.11.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.11.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.11.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.11.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.11.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.11.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.11.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "247bd5028375a15c11b899e539906dad"
+ },
+ {
+ "dataPath": "params_shard_39.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.11.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.11.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.11.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.11.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.11.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.12.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.12.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.12.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.12.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.12.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.12.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.12.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.12.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "16fb3675150461f7a1bb9df13c5f1be0"
+ },
+ {
+ "dataPath": "params_shard_40.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.12.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.12.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.12.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.12.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.12.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.12.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.13.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.13.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.13.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.13.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "b48710093b51da2640f5e5f142361ef8"
+ },
+ {
+ "dataPath": "params_shard_41.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.13.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.13.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.13.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.13.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.13.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.13.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.13.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.13.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.13.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.13.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.14.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "0e8cbae975ca71dc44ae631f6b22c420"
+ },
+ {
+ "dataPath": "params_shard_42.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.14.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.14.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.14.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.14.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.14.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.14.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.14.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.14.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "648c566f3487af200e0dc9cb002f8224"
+ },
+ {
+ "dataPath": "params_shard_43.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.14.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.14.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.14.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.14.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.14.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.15.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.15.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.15.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.15.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.15.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.15.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.15.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.15.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "73c59912ad1af36101a6c62f8da3dd14"
+ },
+ {
+ "dataPath": "params_shard_44.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.15.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.15.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.15.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.15.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.15.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.15.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.16.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.16.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.16.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.16.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "e5ced9410954a7cec21110d51499e3bf"
+ },
+ {
+ "dataPath": "params_shard_45.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.16.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.16.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.16.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.16.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.16.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.16.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.16.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.16.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.16.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.16.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.17.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "626e7c4a5db1df0802d54809079a28e7"
+ },
+ {
+ "dataPath": "params_shard_46.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.17.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.17.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.17.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.17.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.17.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.17.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.17.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.17.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "9e32a2a7bb0c7f6efcc0e505665184c6"
+ },
+ {
+ "dataPath": "params_shard_47.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.17.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.17.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.17.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.17.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.17.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.18.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.18.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.18.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.18.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.18.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.18.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.18.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.18.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "a505e786f2899cc46243c8b0de7fda81"
+ },
+ {
+ "dataPath": "params_shard_48.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.18.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.18.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.18.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.18.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.18.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.18.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.19.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.19.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.19.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.19.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "e03554c06de67e4fb29b6608aaa31c36"
+ },
+ {
+ "dataPath": "params_shard_49.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.19.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.19.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.19.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.19.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.19.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.19.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.19.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.19.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.19.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.19.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.2.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "7910e56543d3cb5dcfb6c19dba018e68"
+ },
+ {
+ "dataPath": "params_shard_50.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.2.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.2.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.2.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.2.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.2.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.2.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.2.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.2.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "b081e9ce76406bece34c43f47cebf6f3"
+ },
+ {
+ "dataPath": "params_shard_51.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.2.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.2.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.2.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.2.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.2.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.20.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.20.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.20.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.20.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.20.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.20.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.20.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.20.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "0dae08317909d7818eef76970c9109e1"
+ },
+ {
+ "dataPath": "params_shard_52.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.20.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.20.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.20.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.20.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.20.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.20.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.21.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.21.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.21.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.21.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "bcda53a84ed1d07fc894f0241634e313"
+ },
+ {
+ "dataPath": "params_shard_53.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.21.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.21.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.21.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.21.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.21.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.21.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.21.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.21.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.21.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.21.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.22.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "fd15ad0bc572f879c32d8c3f20bd72fd"
+ },
+ {
+ "dataPath": "params_shard_54.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.22.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.22.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.22.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.22.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.22.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.22.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.22.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.22.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "362038292afbef8b0435d1ee4ddee06f"
+ },
+ {
+ "dataPath": "params_shard_55.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.22.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.22.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.22.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.22.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.22.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.23.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.23.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.23.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.23.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.23.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.23.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.23.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.23.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "4f5c574d3331360bfd1a18604c9960b4"
+ },
+ {
+ "dataPath": "params_shard_56.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.23.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.23.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.23.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.23.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.23.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.23.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.24.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.24.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.24.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.24.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "7013c8ed6b461fd9cda06229a8486a0b"
+ },
+ {
+ "dataPath": "params_shard_57.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.24.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.24.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.24.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.24.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.24.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.24.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.24.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.24.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.24.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.24.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.25.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "2516356e68d8bcf4959c14eb31a54a8f"
+ },
+ {
+ "dataPath": "params_shard_58.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.25.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.25.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.25.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.25.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.25.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.25.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.25.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.25.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "aba6756153304a0f6c56d5a52a3d4396"
+ },
+ {
+ "dataPath": "params_shard_59.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.25.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.25.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.25.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.25.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.25.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.26.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.26.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.26.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.26.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.26.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.26.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.26.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.26.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "3a7b731cf7e8a5069e88022a7cf3e11c"
+ },
+ {
+ "dataPath": "params_shard_60.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.26.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.26.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.26.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.26.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.26.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.26.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.27.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.27.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.27.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.27.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "afb75e39ac6e5ac35e0ca8ab552b57e0"
+ },
+ {
+ "dataPath": "params_shard_61.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.27.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.27.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.27.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.27.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.27.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.27.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.27.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.27.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.27.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.27.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.28.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "aac863feb913277c33dfa92115916151"
+ },
+ {
+ "dataPath": "params_shard_62.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.28.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.28.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.28.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.28.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.28.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.28.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.28.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.28.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "24aba01f9f4c5df71362bd9ebf41ebf5"
+ },
+ {
+ "dataPath": "params_shard_63.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.28.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.28.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.28.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.28.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.28.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.29.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.29.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.29.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.29.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.29.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.29.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.29.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.29.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "fa7c7715bb3094655b08b14c7d6e7c08"
+ },
+ {
+ "dataPath": "params_shard_64.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.29.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.29.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.29.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.29.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.29.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.29.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.3.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.3.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.3.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.3.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "ec13484b1d985a350d1238cb521b5807"
+ },
+ {
+ "dataPath": "params_shard_65.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.3.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.3.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.3.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.3.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.3.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.3.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.3.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.3.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.3.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.3.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.30.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "59205ebfc3293e21dc4c43fedcc30eec"
+ },
+ {
+ "dataPath": "params_shard_66.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.30.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.30.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.30.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.30.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.30.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.30.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.30.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.30.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "38966c64af05b7401433e5987d96cac6"
+ },
+ {
+ "dataPath": "params_shard_67.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.30.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.30.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.30.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.30.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.30.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.31.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.31.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.31.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.31.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.31.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.31.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.31.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.31.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "dacd9d1a9ea5a9ccdcb47a73bc800c17"
+ },
+ {
+ "dataPath": "params_shard_68.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.31.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.31.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.31.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.31.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.31.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.31.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.32.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.32.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.32.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.32.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "aedec8c57c9b1e15cd44789cf45627f3"
+ },
+ {
+ "dataPath": "params_shard_69.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.32.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.32.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.32.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.32.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.32.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.32.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.32.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.32.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.32.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.32.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.33.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "0c67a5997aa42dcfc54b03f5684d4da1"
+ },
+ {
+ "dataPath": "params_shard_70.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.33.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.33.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.33.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.33.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.33.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.33.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.33.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.33.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "d15eb41c6c16d399b515f4555a95dbbc"
+ },
+ {
+ "dataPath": "params_shard_71.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.33.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.33.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.33.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.33.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.33.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.34.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.34.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.34.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.34.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.34.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.34.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.34.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.34.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "e99e550c04372aa9cbd8156f6e210bc6"
+ },
+ {
+ "dataPath": "params_shard_72.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.34.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.34.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.34.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.34.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.34.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.34.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.35.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.35.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.35.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.35.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "f157489f9c33409e636c20eec6bfc5ab"
+ },
+ {
+ "dataPath": "params_shard_73.bin",
+ "format": "raw-shard",
+ "nbytes": 29864448,
+ "records": [
+ {
+ "name": "llm.model.layers.35.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.35.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.35.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.35.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.35.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.35.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.35.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.35.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.35.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.35.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ }
+ ],
+ "md5sum": "181f9e3a0e26184879b253e076f1ef9d"
+ },
+ {
+ "dataPath": "params_shard_74.bin",
+ "format": "raw-shard",
+ "nbytes": 33182208,
+ "records": [
+ {
+ "name": "llm.model.layers.36.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.36.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.36.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.4.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.4.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 9957888
+ },
+ {
+ "name": "llm.model.layers.4.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.4.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 17422848
+ },
+ {
+ "name": "llm.model.layers.4.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 18252288
+ },
+ {
+ "name": "llm.model.layers.4.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 31523328
+ }
+ ],
+ "md5sum": "5cc30afeeaebf637d2893a981e01d063"
+ },
+ {
+ "dataPath": "params_shard_75.bin",
+ "format": "raw-shard",
+ "nbytes": 23233536,
+ "records": [
+ {
+ "name": "llm.model.layers.4.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.4.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 1658880
+ },
+ {
+ "name": "llm.model.layers.4.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 1663488
+ },
+ {
+ "name": "llm.model.layers.4.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 9626112
+ },
+ {
+ "name": "llm.model.layers.4.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 10621440
+ },
+ {
+ "name": "llm.model.layers.4.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 11616768
+ },
+ {
+ "name": "llm.model.layers.4.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 14270976
+ },
+ {
+ "name": "llm.model.layers.4.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 14602752
+ },
+ {
+ "name": "llm.model.layers.5.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 14934528
+ },
+ {
+ "name": "llm.model.layers.5.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 14939136
+ },
+ {
+ "name": "llm.model.layers.5.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 21574656
+ },
+ {
+ "name": "llm.model.layers.5.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 22404096
+ }
+ ],
+ "md5sum": "77491e6bbf005e6343532a4b8dbbf7af"
+ },
+ {
+ "dataPath": "params_shard_76.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.5.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.5.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.5.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.5.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.5.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.5.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.5.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.5.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.5.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.5.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.6.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "feb31c390332005fc0163bf837360e1c"
+ },
+ {
+ "dataPath": "params_shard_77.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.6.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.6.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.6.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.6.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.6.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.6.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.6.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.6.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "4d0cdf86538ab534bfd941628699420f"
+ },
+ {
+ "dataPath": "params_shard_78.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.6.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.6.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.6.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.6.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.6.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.7.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.7.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.7.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.7.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.7.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.7.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.7.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.7.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "ff2ff8aea8a842c43ace1fc1f38f007e"
+ },
+ {
+ "dataPath": "params_shard_79.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.7.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.7.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.7.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.7.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.7.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.7.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.8.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.8.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.8.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.8.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "db6bc131658fb05704605d26bc8ae8b1"
+ },
+ {
+ "dataPath": "params_shard_80.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.8.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.8.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.8.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.8.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.8.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.8.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.8.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.8.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.8.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.8.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.9.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "a38f38f89dddc71d7a576dac8eb8758d"
+ },
+ {
+ "dataPath": "params_shard_81.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.9.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.9.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.9.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.9.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.9.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.9.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.9.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.9.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "d97db8366d54ce15478b837ffd00d36f"
+ },
+ {
+ "dataPath": "params_shard_82.bin",
+ "format": "raw-shard",
+ "nbytes": 5308416,
+ "records": [
+ {
+ "name": "llm.model.layers.9.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.9.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.9.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.9.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.9.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ }
+ ],
+ "md5sum": "ab2d9aa7d64339d68421417f5ddd983c"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/params_shard_0.bin b/params_shard_0.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1ca4d9ed740d6a9301139a4c24145240875a126c
--- /dev/null
+++ b/params_shard_0.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:496e568ddcdae92404dee93c34356d7838d4fc51358175fbe61861ac762782af
+size 28214784
diff --git a/params_shard_1.bin b/params_shard_1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..65d5c854fb5f0841011eb792bb2e51e7e272215c
--- /dev/null
+++ b/params_shard_1.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25daea934783c8cfcf2ed07d2585ae4e2f2729d904d10183764eca1a8276023b
+size 32850432
diff --git a/params_shard_10.bin b/params_shard_10.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9ebf1af8d458ec7e5ffe64a6866f6b3a011ced23
--- /dev/null
+++ b/params_shard_10.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:430a21019e62e7c53406fa71a4bf43852c45b3eec31fab80b683f7546a06cb8d
+size 30479008
diff --git a/params_shard_11.bin b/params_shard_11.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0a0d8bf99479c38a724dd46b1767327451f0ad10
--- /dev/null
+++ b/params_shard_11.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:217b32b5d6912ba53d483c568f8ba5f6739b2e2628ce8051ee592cb4cff8d8a0
+size 30479008
diff --git a/params_shard_12.bin b/params_shard_12.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6c49e084d852bc045e2fdbe65be3b81a19a04d62
--- /dev/null
+++ b/params_shard_12.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4441c15784d3a07b82c0d96b824f3776c010a0d64bd4fa310fa0a43be184ac6
+size 30479008
diff --git a/params_shard_13.bin b/params_shard_13.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4269141fa5b86d449081395a90cb3d7c39f32746
--- /dev/null
+++ b/params_shard_13.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a039d969bb5cbb99a8a22af56ba307c660006420e6b57c3998e1a6428180a347
+size 30479008
diff --git a/params_shard_14.bin b/params_shard_14.bin
new file mode 100644
index 0000000000000000000000000000000000000000..20e800d37a77588b625790d5489af66d4282b846
--- /dev/null
+++ b/params_shard_14.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:045efe03fee4323787e5a22b1084caa7120ee40748fd180cd4494fc0b8a78213
+size 30479008
diff --git a/params_shard_15.bin b/params_shard_15.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cbdb97fdf72b4a3920ab05bce6fa9b4b1cc7ba50
--- /dev/null
+++ b/params_shard_15.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0127df912475597c6347c09a064dd99ec1e4d421360268472a3019ad18e357fd
+size 30479008
diff --git a/params_shard_16.bin b/params_shard_16.bin
new file mode 100644
index 0000000000000000000000000000000000000000..904568363d16ef3097c49787bfd6f99addcb57b1
--- /dev/null
+++ b/params_shard_16.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1281874847a8a218b72a00d30910df6aecf884bb9db64c7e02852f3baba6c9f2
+size 30479008
diff --git a/params_shard_17.bin b/params_shard_17.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d6d331c7aab34765fd9c07378c91b3c857756a8e
--- /dev/null
+++ b/params_shard_17.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:221fb40fc016e8d45277bb0df2c029bb9d717db2aa70474a7e89ee69ffb37ca1
+size 30479008
diff --git a/params_shard_18.bin b/params_shard_18.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7fab0d8075588c12442f4b251ecfff3646d284d8
--- /dev/null
+++ b/params_shard_18.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:451b358be0e8509a7f75383ef81614b956d8ba84938350688775596876643635
+size 30479008
diff --git a/params_shard_19.bin b/params_shard_19.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2bff50ce69305d76413959041f255057fffa071d
--- /dev/null
+++ b/params_shard_19.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:995c3d8d16c0c41d1bebf0dc06af36d64ebd7daee7000378bd01404b73ca0e16
+size 30479008
diff --git a/params_shard_2.bin b/params_shard_2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e872d9c6c3f4e669fdc6035b1c5400905bde4e29
--- /dev/null
+++ b/params_shard_2.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91b907742837ee13352066f8d51fb2eb8ec1f422ae0e400ccc6699c15d018b25
+size 30200832
diff --git a/params_shard_20.bin b/params_shard_20.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f8ec47445357d542bd950680e40803450550cc42
--- /dev/null
+++ b/params_shard_20.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d9f72e29078c4b9bce0a1415c0e1ddb1755253e9e5f10dee03d70c1d6bbf6e6
+size 30479008
diff --git a/params_shard_21.bin b/params_shard_21.bin
new file mode 100644
index 0000000000000000000000000000000000000000..17c1c7954d809bb98223c23b59d7fcd05f9461fa
--- /dev/null
+++ b/params_shard_21.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eefb854c81a7d1c8d556d08fc916b4db3b25feaa0ed4b658947124c1afd88aaa
+size 30479008
diff --git a/params_shard_22.bin b/params_shard_22.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ab3c15c120ac21c163cc86d63d666abc6c1a94ef
--- /dev/null
+++ b/params_shard_22.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bcbf9ca42c6e64ae72dbd0748953fbf2df7ddc3a43b2609542d2a5264bdc0c34
+size 30479008
diff --git a/params_shard_23.bin b/params_shard_23.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9010d1a295caee7c86d718b017ee95dfb9051265
--- /dev/null
+++ b/params_shard_23.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:880ce4ef89d7544c1faccc8401d9ca94c2a3ff0f093ad2d5953c2a71e8fb55b4
+size 30479008
diff --git a/params_shard_24.bin b/params_shard_24.bin
new file mode 100644
index 0000000000000000000000000000000000000000..08cfecb508c6963ac8ab548c101c8bf52aee9d99
--- /dev/null
+++ b/params_shard_24.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e95cbe6de4e5f62981b0562ed9dd4b8050dda9fcc005aea93a19eac2eaf2f86
+size 30479008
diff --git a/params_shard_25.bin b/params_shard_25.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6eb983ad8b576b6d3f366317e52684bdff7a97e6
--- /dev/null
+++ b/params_shard_25.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc8d37952a69b16cd1ba7516b79c20621a266eb6c8eac16993edb3f560178151
+size 30479008
diff --git a/params_shard_26.bin b/params_shard_26.bin
new file mode 100644
index 0000000000000000000000000000000000000000..31ad22374b220affd87b51761f4e32fb57765d70
--- /dev/null
+++ b/params_shard_26.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7ef2b7e1cd7ff0899d5d11ebce44fbfd004c6eef8f95623eafb6696223d457b
+size 30479008
diff --git a/params_shard_27.bin b/params_shard_27.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3c6871071d5090d49417261182b65a4ccd30a502
--- /dev/null
+++ b/params_shard_27.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4a98056a4c24f765ecef153a52a96eab02542ac83d7d7105ee28c98b483e1fa
+size 30479008
diff --git a/params_shard_28.bin b/params_shard_28.bin
new file mode 100644
index 0000000000000000000000000000000000000000..039d9af3be3cfcf5dfb672d20318ad218a02cd36
--- /dev/null
+++ b/params_shard_28.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2210b6a8a4842ca0e5ddc193f24be846e2f7fcd72ee63d9b0fcde80ef7b43384
+size 30479008
diff --git a/params_shard_29.bin b/params_shard_29.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c6a377657f3cba7526a6ac84c1f59c57572d291e
--- /dev/null
+++ b/params_shard_29.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7180d53a540b68a56d5cbe8ac58f300e5a5a7e428633694181b8ec4d008231a8
+size 30479008
diff --git a/params_shard_3.bin b/params_shard_3.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5ce374ba3eef422817ad422823105c68c9f7a280
--- /dev/null
+++ b/params_shard_3.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25e37927aacebd0d37bc3329400fcebbc8fc57f433e9d82b64a229dd7361a3d3
+size 21570048
diff --git a/params_shard_30.bin b/params_shard_30.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ecb15085b08c7ceb5bbbdf36b83001b23e3db94c
--- /dev/null
+++ b/params_shard_30.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a196be2ebdfe4ceb8da3e5bf5d77e10a5f8e888c8a5f96d0e0888fcc2fc197b
+size 30479008
diff --git a/params_shard_31.bin b/params_shard_31.bin
new file mode 100644
index 0000000000000000000000000000000000000000..093ea38108e8733084bfc77030e1f2c96b394152
--- /dev/null
+++ b/params_shard_31.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf9772e40d5a9bd28bdc2ac176a6f5b6193ca84ceaca0cdbf22f7452bed7aa35
+size 30479008
diff --git a/params_shard_32.bin b/params_shard_32.bin
new file mode 100644
index 0000000000000000000000000000000000000000..879fddb2a0f3e44fa1934d62e3ce6e6bcfa7fbfd
--- /dev/null
+++ b/params_shard_32.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8f04587a411a6eafaa08f10499ce4ebb4f20fa7897e0342a90d88be3d4ac7a8
+size 565645824
diff --git a/params_shard_33.bin b/params_shard_33.bin
new file mode 100644
index 0000000000000000000000000000000000000000..300f706c10981ad4f3d2c6593705c64507e01785
--- /dev/null
+++ b/params_shard_33.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34620bb04cfdfb78f06ae3f5fcc7d06601bc175d72c07ec23820b538dbcf7e77
+size 29771680
diff --git a/params_shard_34.bin b/params_shard_34.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f417317e10032b679e40e8e9e1e22b65c96c8011
--- /dev/null
+++ b/params_shard_34.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2723eae16ce54fbeb87413f954719dd259912c72c46121b8c63eb99944049bf3
+size 32850432
diff --git a/params_shard_35.bin b/params_shard_35.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ef4a48f3aab1aa579399de7458558e5beaf76b55
--- /dev/null
+++ b/params_shard_35.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08e4f658fffd88ee62ada48a3d32ea6fce0415a0a62052d8e7eacef013678897
+size 30200832
diff --git a/params_shard_36.bin b/params_shard_36.bin
new file mode 100644
index 0000000000000000000000000000000000000000..61fbbe8fa941e69eaca412d9692419c642eabaad
--- /dev/null
+++ b/params_shard_36.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:378ac0db240581879d5bffb5a2a6547fccf669ba1580ade231927f69459ad18b
+size 21570048
diff --git a/params_shard_37.bin b/params_shard_37.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5b72f088871f362100a3a4f25a0adc6114276271
--- /dev/null
+++ b/params_shard_37.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e2b461a6e9de7bbf0e14aa2a479eb8f35a224b65568030be6d6d2396422ea8a
+size 29869056
diff --git a/params_shard_38.bin b/params_shard_38.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d1ada8a387b9586a404d1cf1a4afb6f04d4a452f
--- /dev/null
+++ b/params_shard_38.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c2d84ec8266932f00ffc7f7c2793f8a4a6b43dbb17b19cb383c63120764917d
+size 32850432
diff --git a/params_shard_39.bin b/params_shard_39.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5adb2e5cee5947ba44da4063c706afc6485b4ee3
--- /dev/null
+++ b/params_shard_39.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:042089bc900330354dbf879d5837e0beeec57bf2221cb4684570aa29aaf0f11c
+size 30200832
diff --git a/params_shard_4.bin b/params_shard_4.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c73d62d431eb6effc3b8ef86f21746a32751a3d
--- /dev/null
+++ b/params_shard_4.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5ec2db7d16f00823af1e30f52f0b6bc2dfa83aab34cedc05e4f510c3835ba71
+size 29882880
diff --git a/params_shard_40.bin b/params_shard_40.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ddca9d331d0084ed0895b37e7c101114258e4d7d
--- /dev/null
+++ b/params_shard_40.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7cfa0bf59908fe4a56bda7d82cc2ed1afefa6064d7dbc540e1fd4f7e7ed87dc0
+size 21570048
diff --git a/params_shard_41.bin b/params_shard_41.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e59990d556939570e9c10730ec153034fa1feb5e
--- /dev/null
+++ b/params_shard_41.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9f91a3fd0b66994dadb557ac9a3239652b728e13d98a45b27357e557c9c1b45
+size 29869056
diff --git a/params_shard_42.bin b/params_shard_42.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5ae5dd5e423010e8953224f5fea1eece8c836829
--- /dev/null
+++ b/params_shard_42.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c36cdb141b0c1ac84e217cdb3b03413f0b55094d214183c16aaa402c6ce02065
+size 32850432
diff --git a/params_shard_43.bin b/params_shard_43.bin
new file mode 100644
index 0000000000000000000000000000000000000000..321effab17515bf52d40279bcfbc6106acaecef1
--- /dev/null
+++ b/params_shard_43.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:816b4821c673c3bde80dac49f70f590480bd29df79292e64e788eaa541708f90
+size 30200832
diff --git a/params_shard_44.bin b/params_shard_44.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a13709f6f532e21be34d57c51330a7a8352ec711
--- /dev/null
+++ b/params_shard_44.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c952423d58b370814dd7d007f01095098f73efc348660bd5806912a85821eef
+size 21570048
diff --git a/params_shard_45.bin b/params_shard_45.bin
new file mode 100644
index 0000000000000000000000000000000000000000..08552dfeed289c095522d0988274e9018b0ec6a7
--- /dev/null
+++ b/params_shard_45.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dfc7986e84e524293b9717e01cb962eb669291997bfd94801c8cd8434fbbc73b
+size 29869056
diff --git a/params_shard_46.bin b/params_shard_46.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e3c36f4d868c62a3dd9c2a309e3f9bab000337cd
--- /dev/null
+++ b/params_shard_46.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ef09bd600672ae141f97f66a6ae57edee16858f4bdd2200dfd120c79a142612
+size 32850432
diff --git a/params_shard_47.bin b/params_shard_47.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b6a52b37d9b59e664f3327964949c7ea9c8ff929
--- /dev/null
+++ b/params_shard_47.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:362f771af2e7efcec6298386cda17f6e267d59cb2163253fd9639e75b87ac6e8
+size 30200832
diff --git a/params_shard_48.bin b/params_shard_48.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5cae05be9d9f5e89ab544d5efd0bdfe05bc3bf53
--- /dev/null
+++ b/params_shard_48.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ad689aa3fbe0df342b9fbcd9595df851d810cc981bdeb8b659e292e430f28ca
+size 21570048
diff --git a/params_shard_49.bin b/params_shard_49.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0640469fab28fd9ac7d6b4fbb6f12629016a08d2
--- /dev/null
+++ b/params_shard_49.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0ea7ffdd08154d91c0af2ec4658befc1d19b083d88cd77854747a8f6c41363f
+size 29869056
diff --git a/params_shard_5.bin b/params_shard_5.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e6ec573e1426d5d04bba2211ae000c2f03909ad5
--- /dev/null
+++ b/params_shard_5.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db77a1db0896bfb0ecfb5b022c6bb9f7a73ec0d7192b31a5e1f3d301417fe619
+size 31855104
diff --git a/params_shard_50.bin b/params_shard_50.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0af5f92be001da0c76524328683c6c5ed267f23c
--- /dev/null
+++ b/params_shard_50.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67dbab89e9c415a59b2d4ea61c5dc59f1865ca9052661885f39988e997358ff0
+size 32850432
diff --git a/params_shard_51.bin b/params_shard_51.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a531f6e2021410338dccdcbb9eab216aaa782141
--- /dev/null
+++ b/params_shard_51.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:253a3eca509e0914e0388db14d8a1e24eed2631582217914312d87e4d22a6a34
+size 30200832
diff --git a/params_shard_52.bin b/params_shard_52.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1cff1cf3f4c7ce4462f780f26716be45c1e3511b
--- /dev/null
+++ b/params_shard_52.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6caca0fa4bb2b24cdbd91bfdb725c72da28790f581a10f0feb7dc3afec00e92
+size 21570048
diff --git a/params_shard_53.bin b/params_shard_53.bin
new file mode 100644
index 0000000000000000000000000000000000000000..48a85d2bb05d4dfe5a8b49303922c64b11f5e064
--- /dev/null
+++ b/params_shard_53.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ccf18e637e0a1f8a06343dcf8bd894e7485da960d3ce7a2d4155203d08731279
+size 29869056
diff --git a/params_shard_54.bin b/params_shard_54.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f3c0cdcfb7a80a051b4ae33668375ecd2a320c0
--- /dev/null
+++ b/params_shard_54.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e20e30d9d6ac9916b1d62724d18faacd425c09f7b747989e96cbd74fa8e5ab3f
+size 32850432
diff --git a/params_shard_55.bin b/params_shard_55.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a41a9c4412054e460bfd76f8ef67d3ddb380eefa
--- /dev/null
+++ b/params_shard_55.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f095440ef590a73cd60cc007d052cb8739465ea4698e32f209ced5b11f068be
+size 30200832
diff --git a/params_shard_56.bin b/params_shard_56.bin
new file mode 100644
index 0000000000000000000000000000000000000000..18c610853d611608651ef22f4bf09dc9c79d4b16
--- /dev/null
+++ b/params_shard_56.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e3357cb3baac17a1ee27d5f2e82d517036e6669a6f811c7b03916d70df4bca8
+size 21570048
diff --git a/params_shard_57.bin b/params_shard_57.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fc149747b5ba2eb7fb0987ebadcf4aab17203aa6
--- /dev/null
+++ b/params_shard_57.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4e0a314fbbdc304ab78d5899c39ee9760c1c839878394a47c387a469535660b
+size 29869056
diff --git a/params_shard_58.bin b/params_shard_58.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3e50c2088988c7ceee5fecb66a1295f185690a7b
--- /dev/null
+++ b/params_shard_58.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18ba22fb41e1c0d9bec41df4d42257ca21aacece0d724a5b4449e1f593a0b6e5
+size 32850432
diff --git a/params_shard_59.bin b/params_shard_59.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7a97ae306aa8de029c890789d8f1f30a73f8f122
--- /dev/null
+++ b/params_shard_59.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48d5f7928ac74f3ff340df09d783912f80589e30218a42ab5359faabe14c3763
+size 30200832
diff --git a/params_shard_6.bin b/params_shard_6.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a0b17d6fac0cc79f9429287d234e305759eca157
--- /dev/null
+++ b/params_shard_6.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f62360f2ffa15483a392a21ddd49cb911ca4155608d05a1fc1b9e0c1e387e1a
+size 31002624
diff --git a/params_shard_60.bin b/params_shard_60.bin
new file mode 100644
index 0000000000000000000000000000000000000000..56dd982b14d44cb7861057c001445628371affa7
--- /dev/null
+++ b/params_shard_60.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:352e9813a72fcc56e38c86f03c055391173f68eb3c5c6b715a767d5e62cfec15
+size 21570048
diff --git a/params_shard_61.bin b/params_shard_61.bin
new file mode 100644
index 0000000000000000000000000000000000000000..809cc66bac3a90629c96695e1d5fe85cf84b824f
--- /dev/null
+++ b/params_shard_61.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6982b397a4a677c36fd94054e4e25a6fd665fd0f43f9cbf72ae53ec30f3c2d8b
+size 29869056
diff --git a/params_shard_62.bin b/params_shard_62.bin
new file mode 100644
index 0000000000000000000000000000000000000000..20debb2a325178c3b500f4d12743d0492e517360
--- /dev/null
+++ b/params_shard_62.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9938e8d2b4ff87c3aac563e0ccba46a92dfe81cbc03faa50f1e027864962ff82
+size 32850432
diff --git a/params_shard_63.bin b/params_shard_63.bin
new file mode 100644
index 0000000000000000000000000000000000000000..90646362c1cd4fc998c07c9a2167fab223895522
--- /dev/null
+++ b/params_shard_63.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e378153c653cd484a44bac8918a10870933e5b87c055ec175d1dffda3df8218
+size 30200832
diff --git a/params_shard_64.bin b/params_shard_64.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7619487bcf50f39dab46f77755fa78e50f6b2e9e
--- /dev/null
+++ b/params_shard_64.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dc126e3b8fe1d29de5bcf1c375a7ee48da4b9cca22e8e2965035c2124ccd3c57
+size 21570048
diff --git a/params_shard_65.bin b/params_shard_65.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d6c0a524111e2fd934291ed16fe171a3353acaf
--- /dev/null
+++ b/params_shard_65.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d177ac7c4df9fd6e714dc8c0be1311d9ae1509aed3d0db5180473e04abfbed7d
+size 29869056
diff --git a/params_shard_66.bin b/params_shard_66.bin
new file mode 100644
index 0000000000000000000000000000000000000000..433adee506f24af8341f6c42a3168ad939a4d6c3
--- /dev/null
+++ b/params_shard_66.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:706fc01a835c67388aae59bed19b64ff62a0df06fe97cebe505977b85a6ad034
+size 32850432
diff --git a/params_shard_67.bin b/params_shard_67.bin
new file mode 100644
index 0000000000000000000000000000000000000000..91ec940841a5ca47ea298f9e0196530c443fc637
--- /dev/null
+++ b/params_shard_67.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96976c44e4bca4fc13371c3a90d29f74b4137f653dd356ff629c00ff2b4c72b0
+size 30200832
diff --git a/params_shard_68.bin b/params_shard_68.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2142a2a1829715b184bfa389183bad05a5f5db8d
--- /dev/null
+++ b/params_shard_68.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0eaf148059e0491941f44cb1a7355132f5dedbb980829b05f98734a8350ffae2
+size 21570048
diff --git a/params_shard_69.bin b/params_shard_69.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ab62dfdf32539b03b5ee9b3a6293be0975ac8ed2
--- /dev/null
+++ b/params_shard_69.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:867d5921346ce04486993bfb1e779dbf4bcd5a63582c2e8c0e77c4cdb60780b0
+size 29869056
diff --git a/params_shard_7.bin b/params_shard_7.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b8acd02bf855679fd160674161c776a17846ac7d
--- /dev/null
+++ b/params_shard_7.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4ce2996f451d3c25b8feb67b91bab9e9bde5851bef3a91647d1e8710d6045a7
+size 30479008
diff --git a/params_shard_70.bin b/params_shard_70.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cca337771bf09c02f0b139120d860fead09b90d2
--- /dev/null
+++ b/params_shard_70.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abbf0f8d216682cf4a14a58d06b70f43ad4c5af8155c7c26aefd705f24908c62
+size 32850432
diff --git a/params_shard_71.bin b/params_shard_71.bin
new file mode 100644
index 0000000000000000000000000000000000000000..971d4d57f277a12ea8d9f3af54cfad70e77f93a6
--- /dev/null
+++ b/params_shard_71.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b3423ff6934b1b6ec34cfdaa047298fbff0407fcf0e0fa8a2aa2927e2fc0615
+size 30200832
diff --git a/params_shard_72.bin b/params_shard_72.bin
new file mode 100644
index 0000000000000000000000000000000000000000..25833868c45c3dcc22f08ad5f62a8c34245cbd66
--- /dev/null
+++ b/params_shard_72.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a893d28d083ef0d32f2772d8cd7da34472a62e19704ef10d3744588c34440550
+size 21570048
diff --git a/params_shard_73.bin b/params_shard_73.bin
new file mode 100644
index 0000000000000000000000000000000000000000..733e2f23272e1b1d6ec37e89074a5cc05458f12d
--- /dev/null
+++ b/params_shard_73.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:368579b518180d24887b714a154858ac8c58c9cfad6d23c47225e9beeae5992f
+size 29864448
diff --git a/params_shard_74.bin b/params_shard_74.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f8e6374dcb3fe6ad3337aa10a3ae5c26a4b79d4b
--- /dev/null
+++ b/params_shard_74.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:412e2b697ac6b0b1f95260f8e1aeebb0aabb577dde51ca9a3ef3f892b7c2e38b
+size 33182208
diff --git a/params_shard_75.bin b/params_shard_75.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9819fb7708a3d614c4a7a9740f2aa449f56a9d77
--- /dev/null
+++ b/params_shard_75.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f138a6e0bf24a2a834f055fbb9601364a691749401ba401cd5f6e6f0f18d1768
+size 23233536
diff --git a/params_shard_76.bin b/params_shard_76.bin
new file mode 100644
index 0000000000000000000000000000000000000000..058bdce99bd7cf97c0a5d562eab0eafd8edac109
--- /dev/null
+++ b/params_shard_76.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0c03dde625ee806aba1a95d126a94e8b93047bcad8d9cf34163721481a752d1
+size 29869056
diff --git a/params_shard_77.bin b/params_shard_77.bin
new file mode 100644
index 0000000000000000000000000000000000000000..78c3362feff398ee9c699beaf4514869123a7cf3
--- /dev/null
+++ b/params_shard_77.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5967b62d7a2638782fa8dc9084c9cb6f3caea27cf20e90fcf61d064c574c6d0e
+size 32850432
diff --git a/params_shard_78.bin b/params_shard_78.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f37e33d30df83e78431e30f8ae0a7e6707039ca5
--- /dev/null
+++ b/params_shard_78.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76e516a2fc2689415851e66e7a9a97e327b366d1f2a852e0462471b04ab27588
+size 30200832
diff --git a/params_shard_79.bin b/params_shard_79.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6aebabb049925753c8152e5c529121c4ee9116bc
--- /dev/null
+++ b/params_shard_79.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0001cf7722307fc80eba91aaff20750fffa9a69c9b7722005ffd31422af85f0
+size 21570048
diff --git a/params_shard_8.bin b/params_shard_8.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cdde64b08c514190061caf1da966c27d734de3aa
--- /dev/null
+++ b/params_shard_8.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:734a253981824d7fc3ac9f9f1a5c98631db11035756295e4914036680c294e55
+size 30479008
diff --git a/params_shard_80.bin b/params_shard_80.bin
new file mode 100644
index 0000000000000000000000000000000000000000..86b46b427f3148b89d79743d79bb2d036a4f79c7
--- /dev/null
+++ b/params_shard_80.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68afe9e2e6a310d69ba74db1523b8d6c251d441ac3f7632a0454712de0ca9558
+size 29869056
diff --git a/params_shard_81.bin b/params_shard_81.bin
new file mode 100644
index 0000000000000000000000000000000000000000..19bc424d25f4803e5f5c18273ab25cc166be7e72
--- /dev/null
+++ b/params_shard_81.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5032a7c473e25b41e2e7ded0a2152497991b0796b081c4826ac6f2c955f94f8f
+size 32850432
diff --git a/params_shard_82.bin b/params_shard_82.bin
new file mode 100644
index 0000000000000000000000000000000000000000..527524685c496936a3dd81cf65c68264db1537cd
--- /dev/null
+++ b/params_shard_82.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:134f9b8eccc9495d0b7b89e3d9418c6b485d25166315e9a9c193bac02ef1edf3
+size 5308416
diff --git a/params_shard_9.bin b/params_shard_9.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e71d29a487a550a2517d32e7e5aa7bf95c1a04c9
--- /dev/null
+++ b/params_shard_9.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33291985f824863a98360bbb46d2492c374edba0488fd85928f190519fde9b9f
+size 30479008
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1fc53a443292460080a2b90af205b558a4b927a3
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,294507 @@
+{
+ "version": "1.0",
+ "truncation": null,
+ "padding": null,
+ "added_tokens": [
+ {
+ "id": 0,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 1,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 2,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 101,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 102,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 103,
+ "content": "[",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 104,
+ "content": "]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 105,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 106,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 107,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 108,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ }
+ ],
+ "normalizer": {
+ "type": "Sequence",
+ "normalizers": [
+ {
+ "type": "Prepend",
+ "prepend": "▁"
+ },
+ {
+ "type": "Replace",
+ "pattern": {
+ "String": " "
+ },
+ "content": "▁"
+ }
+ ]
+ },
+ "pre_tokenizer": null,
+ "post_processor": {
+ "type": "TemplateProcessing",
+ "single": [
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 0
+ }
+ },
+ {
+ "Sequence": {
+ "id": "A",
+ "type_id": 0
+ }
+ }
+ ],
+ "pair": [
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 0
+ }
+ },
+ {
+ "Sequence": {
+ "id": "A",
+ "type_id": 0
+ }
+ },
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 1
+ }
+ },
+ {
+ "Sequence": {
+ "id": "B",
+ "type_id": 1
+ }
+ }
+ ],
+ "special_tokens": {
+ "": {
+ "id": "",
+ "ids": [
+ 1
+ ],
+ "tokens": [
+ ""
+ ]
+ }
+ }
+ },
+ "decoder": {
+ "type": "Sequence",
+ "decoders": [
+ {
+ "type": "Replace",
+ "pattern": {
+ "String": "▁"
+ },
+ "content": " "
+ },
+ {
+ "type": "ByteFallback"
+ },
+ {
+ "type": "Fuse"
+ },
+ {
+ "type": "Strip",
+ "content": " ",
+ "start": 1,
+ "stop": 0
+ }
+ ]
+ },
+ "model": {
+ "type": "BPE",
+ "dropout": null,
+ "unk_token": "",
+ "continuing_subword_prefix": null,
+ "end_of_word_suffix": null,
+ "fuse_unk": true,
+ "byte_fallback": true,
+ "vocab": {
+ "": 0,
+ "": 1,
+ "": 2,
+ "": 3,
+ "": 4,
+ "\n": 5,
+ "\t": 6,
+ "
": 7,
+ "
": 8,
+ "": 9,
+ "": 10,
+ "": 11,
+ "
": 12,
+ "": 13,
+ " | | ": 14,
+ "": 15,
+ "": 16,
+ "": 17,
+ "": 18,
+ "": 21,
+ "": 22,
+ "
": 23,
+ "": 24,
+ "": 25,
+ "": 26,
+ "": 27,
+ "": 28,
+ "": 29,
+ "": 30,
+ "": 31,
+ "": 32,
+ "
": 33,
+ "
": 34,
+ "
": 35,
+ "": 36,
+ "": 37,
+ "": 38,
+ "
": 39,
+ "": 40,
+ "": 41,
+ "
": 42,
+ "": 43,
+ "
": 44,
+ "
": 45,
+ "": 46,
+ "": 47,
+ "
": 48,
+ "": 49,
+ "": 50,
+ "": 51,
+ "0": 52,
+ "1": 53,
+ "2": 54,
+ "3": 55,
+ "4": 56,
+ "5": 57,
+ "6": 58,
+ "7": 59,
+ "8": 60,
+ "9": 61,
+ "+": 62,
+ "-": 63,
+ "=": 64,
+ ",": 65,
+ "。": 66,
+ "!": 67,
+ "?": 68,
+ "、": 69,
+ ":": 70,
+ "¥": 71,
+ ".": 72,
+ "!": 73,
+ "?": 74,
+ "...": 75,
+ "。。。": 76,
+ "。。。。。。": 77,
+ "《": 78,
+ "》": 79,
+ "【": 80,
+ "】": 81,
+ "『": 82,
+ "』": 83,
+ "```": 84,
+ "": 86,
+ "---": 87,
+ "": 88,
+ ";": 89,
+ ".": 90,
+ "=": 91,
+ "<": 92,
+ ">": 93,
+ "-": 94,
+ "+": 95,
+ "%": 96,
+ "‼": 97,
+ "㊣": 98,
+ "/": 99,
+ "|": 100,
+ "": 101,
+ "": 102,
+ "[": 103,
+ "]": 104,
+ "": 105,
+ "": 106,
+ "": 107,
+ "": 108,
+ "": 109,
+ "": 110,
+ "": 111,
+ "": 112,
+ "": 113,
+ "": 114,
+ "": 115,
+ "": 116,
+ "": 117,
+ "": 118,
+ "": 119,
+ "": 120,
+ "": 121,
+ "": 122,
+ "": 123,
+ "": 124,
+ "": 125,
+ "": 126,
+ "": 127,
+ "": 128,
+ "": 129,
+ "": 130,
+ "": 131,
+ "": 132,
+ "": 133,
+ "": 134,
+ "": 135,
+ "": 136,
+ "": 137,
+ "": 138,
+ "": 139,
+ "": 140,
+ "": 141,
+ "": 142,
+ "": 143,
+ "": 144,
+ "": 145,
+ "": 146,
+ "": 147,
+ "": 148,
+ "": 149,
+ "": 150,
+ "": 151,
+ "": 152,
+ "": 153,
+ "": 154,
+ "": 155,
+ "": 156,
+ "": 157,
+ "": 158,
+ "": 159,
+ "": 160,
+ "": 161,
+ "": 162,
+ "": 163,
+ "": 164,
+ "": 165,
+ "": 166,
+ "": 167,
+ "": 168,
+ "": 169,
+ "": 170,
+ "": 171,
+ "": 172,
+ "": 173,
+ "": 174,
+ "": 175,
+ "": 176,
+ "": 177,
+ "": 178,
+ "": 179,
+ "": 180,
+ "": 181,
+ "": 182,
+ "": 183,
+ "": 184,
+ "": 185,
+ "": 186,
+ "": 187,
+ "": 188,
+ "": 189,
+ "": 190,
+ "": 191,
+ "": 192,
+ "": 193,
+ "": 194,
+ "": 195,
+ "": 196,
+ "": 197,
+ "": 198,
+ "": 199,
+ "": 200,
+ "": 201,
+ "": 202,
+ "": 203,
+ "": 204,
+ "": 205,
+ "": 206,
+ "": 207,
+ "": 208,
+ "": 209,
+ "": 210,
+ "": 211,
+ "": 212,
+ "": 213,
+ "": 214,
+ "": 215,
+ "": 216,
+ "": 217,
+ "": 218,
+ "": 219,
+ "": 220,
+ "": 221,
+ "": 222,
+ "": 223,
+ "": 224,
+ "": 225,
+ "": 226,
+ "": 227,
+ "": 228,
+ "": 229,
+ "": 230,
+ "": 231,
+ "": 232,
+ "": 233,
+ "": 234,
+ "": 235,
+ "": 236,
+ "": 237,
+ "": 238,
+ "": 239,
+ "": 240,
+ "": 241,
+ "": 242,
+ "": 243,
+ "": 244,
+ "": 245,
+ "": 246,
+ "": 247,
+ "": 248,
+ "": 249,
+ "": 250,
+ "": 251,
+ "": 252,
+ "": 253,
+ "": 254,
+ "": 255,
+ "": 256,
+ "": 257,
+ "": 258,
+ "": 259,
+ "": 260,
+ "": 261,
+ "": 262,
+ "": 263,
+ "": 264,
+ "": 265,
+ "": 266,
+ "": 267,
+ "": 268,
+ "": 269,
+ "": 270,
+ "": 271,
+ "": 272,
+ "": 273,
+ "": 274,
+ "": 275,
+ "": 276,
+ "": 277,
+ "": 278,
+ "": 279,
+ "": 280,
+ "": 281,
+ "": 282,
+ "": 283,
+ "": 284,
+ "": 285,
+ "": 286,
+ "": 287,
+ "": 288,
+ "": 289,
+ "": 290,
+ "": 291,
+ "": 292,
+ "": 293,
+ "": 294,
+ "": 295,
+ "": 296,
+ "": 297,
+ "": 298,
+ "": 299,
+ "": 300,
+ "": 301,
+ "": 302,
+ "": 303,
+ "": 304,
+ "": 305,
+ "": 306,
+ "": 307,
+ "": 308,
+ "": 309,
+ "": 310,
+ "": 311,
+ "": 312,
+ "": 313,
+ "": 314,
+ "": 315,
+ "": 316,
+ "": 317,
+ "": 318,
+ "": 319,
+ "": 320,
+ "": 321,
+ "": 322,
+ "": 323,
+ "": 324,
+ "": 325,
+ "": 326,
+ "": 327,
+ "": 328,
+ "": 329,
+ "": 330,
+ "": 331,
+ "": 332,
+ "": 333,
+ "": 334,
+ "": 335,
+ "": 336,
+ "": 337,
+ "": 338,
+ "": 339,
+ "": 340,
+ "": 341,
+ "": 342,
+ "": 343,
+ "": 344,
+ "": 345,
+ "": 346,
+ "": 347,
+ "": 348,
+ "": 349,
+ "": 350,
+ "": 351,
+ "": 352,
+ "": 353,
+ "": 354,
+ "": 355,
+ "": 356,
+ "": 357,
+ "": 358,
+ "": 359,
+ "": 360,
+ "": 361,
+ "