{ "metadata": { "ParamSize": 149, "ParamBytes": 132848640.0, "BitsPerParam": 32.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 33304320, "records": [ { "name": "embeddings.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 0 }, { "name": "embeddings.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 768 }, { "name": "embeddings.position_embeddings.weight", "shape": [ 512, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 1536 }, { "name": "embeddings.token_type_embeddings.weight", "shape": [ 2, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1536, "byteOffset": 394752 }, { "name": "embeddings.word_embeddings.weight", "shape": [ 30522, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23440896, "byteOffset": 396288 }, { "name": "encoder.layer.0.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 23837184 }, { "name": "encoder.layer.0.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 23837952 }, { "name": "encoder.layer.0.attention.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 23838720 }, { "name": "encoder.layer.0.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 294912, "byteOffset": 23839488 }, { "name": "encoder.layer.0.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 24134400 }, { "name": "encoder.layer.0.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 884736, "byteOffset": 24136704 }, { "name": "encoder.layer.0.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, "byteOffset": 25021440 }, { "name": "encoder.layer.0.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 25024512 }, { "name": "encoder.layer.0.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 26204160 }, { "name": "encoder.layer.0.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 26204928 }, { "name": "encoder.layer.0.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 26205696 }, { "name": "encoder.layer.0.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 26206464 }, { "name": "encoder.layer.1.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 27386112 }, { "name": "encoder.layer.1.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 27386880 }, { "name": "encoder.layer.1.attention.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 27387648 }, { "name": "encoder.layer.1.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 294912, "byteOffset": 27388416 }, { "name": "encoder.layer.1.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 27683328 }, { "name": "encoder.layer.1.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 884736, "byteOffset": 27685632 }, { "name": "encoder.layer.1.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, "byteOffset": 28570368 }, { "name": "encoder.layer.1.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 28573440 }, { "name": "encoder.layer.1.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 29753088 }, { "name": "encoder.layer.1.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 29753856 }, { "name": "encoder.layer.1.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 29754624 }, { "name": "encoder.layer.1.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 29755392 }, { "name": "encoder.layer.10.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 30935040 }, { "name": "encoder.layer.10.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 30935808 }, { "name": "encoder.layer.10.attention.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 30936576 }, { "name": "encoder.layer.10.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 294912, "byteOffset": 30937344 }, { "name": "encoder.layer.10.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 31232256 }, { "name": "encoder.layer.10.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 884736, "byteOffset": 31234560 }, { "name": "encoder.layer.10.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, "byteOffset": 32119296 }, { "name": "encoder.layer.10.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 32122368 }, { "name": "encoder.layer.10.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 33302016 }, { "name": "encoder.layer.10.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 33302784 }, { "name": "encoder.layer.10.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 33303552 } ], "md5sum": "12f41dfd857e82988a77aaa528e2af9f" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 33120000, "records": [ { "name": "encoder.layer.10.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 0 }, { "name": "encoder.layer.11.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 1179648 }, { "name": "encoder.layer.11.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 1180416 }, { "name": "encoder.layer.11.attention.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 1181184 }, { "name": "encoder.layer.11.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 294912, "byteOffset": 1181952 }, { "name": "encoder.layer.11.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 1476864 }, { "name": "encoder.layer.11.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 884736, "byteOffset": 1479168 }, { "name": "encoder.layer.11.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, "byteOffset": 2363904 }, { "name": "encoder.layer.11.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 2366976 }, { "name": "encoder.layer.11.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 3546624 }, { "name": "encoder.layer.11.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 3547392 }, { "name": "encoder.layer.11.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 3548160 }, { "name": "encoder.layer.11.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 3548928 }, { "name": "encoder.layer.2.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 4728576 }, { "name": "encoder.layer.2.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 4729344 }, { "name": "encoder.layer.2.attention.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 4730112 }, { "name": "encoder.layer.2.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 294912, "byteOffset": 4730880 }, { "name": "encoder.layer.2.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 5025792 }, { "name": "encoder.layer.2.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 884736, "byteOffset": 5028096 }, { "name": "encoder.layer.2.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, "byteOffset": 5912832 }, { "name": "encoder.layer.2.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 5915904 }, { "name": "encoder.layer.2.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 7095552 }, { "name": "encoder.layer.2.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 7096320 }, { "name": "encoder.layer.2.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 7097088 }, { "name": "encoder.layer.2.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 7097856 }, { "name": "encoder.layer.3.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 8277504 }, { "name": "encoder.layer.3.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 8278272 }, { "name": "encoder.layer.3.attention.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 8279040 }, { "name": "encoder.layer.3.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 294912, "byteOffset": 8279808 }, { "name": "encoder.layer.3.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 8574720 }, { "name": "encoder.layer.3.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 884736, "byteOffset": 8577024 }, { "name": "encoder.layer.3.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, "byteOffset": 9461760 }, { "name": "encoder.layer.3.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 9464832 }, { "name": "encoder.layer.3.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 10644480 }, { "name": "encoder.layer.3.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 10645248 }, { "name": "encoder.layer.3.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 10646016 }, { "name": "encoder.layer.3.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 10646784 }, { "name": "encoder.layer.4.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 11826432 }, { "name": "encoder.layer.4.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 11827200 }, { "name": "encoder.layer.4.attention.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 11827968 }, { "name": "encoder.layer.4.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 294912, "byteOffset": 11828736 }, { "name": "encoder.layer.4.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 12123648 }, { "name": "encoder.layer.4.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 884736, "byteOffset": 12125952 }, { "name": "encoder.layer.4.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, "byteOffset": 13010688 }, { "name": "encoder.layer.4.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 13013760 }, { "name": "encoder.layer.4.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 14193408 }, { "name": "encoder.layer.4.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 14194176 }, { "name": "encoder.layer.4.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 14194944 }, { "name": "encoder.layer.4.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 14195712 }, { "name": "encoder.layer.5.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 15375360 }, { "name": "encoder.layer.5.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 15376128 }, { "name": "encoder.layer.5.attention.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 15376896 }, { "name": "encoder.layer.5.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 294912, "byteOffset": 15377664 }, { "name": "encoder.layer.5.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 15672576 }, { "name": "encoder.layer.5.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 884736, "byteOffset": 15674880 }, { "name": "encoder.layer.5.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, "byteOffset": 16559616 }, { "name": "encoder.layer.5.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 16562688 }, { "name": "encoder.layer.5.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 17742336 }, { "name": "encoder.layer.5.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 17743104 }, { "name": "encoder.layer.5.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 17743872 }, { "name": "encoder.layer.5.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 17744640 }, { "name": "encoder.layer.6.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 18924288 }, { "name": "encoder.layer.6.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 18925056 }, { "name": "encoder.layer.6.attention.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 18925824 }, { "name": "encoder.layer.6.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 294912, "byteOffset": 18926592 }, { "name": "encoder.layer.6.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 19221504 }, { "name": "encoder.layer.6.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 884736, "byteOffset": 19223808 }, { "name": "encoder.layer.6.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, "byteOffset": 20108544 }, { "name": "encoder.layer.6.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 20111616 }, { "name": "encoder.layer.6.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 21291264 }, { "name": "encoder.layer.6.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 21292032 }, { "name": "encoder.layer.6.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 21292800 }, { "name": "encoder.layer.6.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 21293568 }, { "name": "encoder.layer.7.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 22473216 }, { "name": "encoder.layer.7.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 22473984 }, { "name": "encoder.layer.7.attention.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 22474752 }, { "name": "encoder.layer.7.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 294912, "byteOffset": 22475520 }, { "name": "encoder.layer.7.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 22770432 }, { "name": "encoder.layer.7.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 884736, "byteOffset": 22772736 }, { "name": "encoder.layer.7.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, "byteOffset": 23657472 }, { "name": "encoder.layer.7.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 23660544 }, { "name": "encoder.layer.7.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 24840192 }, { "name": "encoder.layer.7.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 24840960 }, { "name": "encoder.layer.7.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 24841728 }, { "name": "encoder.layer.7.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 24842496 }, { "name": "encoder.layer.8.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 26022144 }, { "name": "encoder.layer.8.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 26022912 }, { "name": "encoder.layer.8.attention.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 26023680 }, { "name": "encoder.layer.8.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 294912, "byteOffset": 26024448 }, { "name": "encoder.layer.8.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 26319360 }, { "name": "encoder.layer.8.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 884736, "byteOffset": 26321664 }, { "name": "encoder.layer.8.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, "byteOffset": 27206400 }, { "name": "encoder.layer.8.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 27209472 }, { "name": "encoder.layer.8.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 28389120 }, { "name": "encoder.layer.8.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 28389888 }, { "name": "encoder.layer.8.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 28390656 }, { "name": "encoder.layer.8.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 28391424 }, { "name": "encoder.layer.9.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 29571072 }, { "name": "encoder.layer.9.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 29571840 }, { "name": "encoder.layer.9.attention.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 29572608 }, { "name": "encoder.layer.9.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 294912, "byteOffset": 29573376 }, { "name": "encoder.layer.9.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 29868288 }, { "name": "encoder.layer.9.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 884736, "byteOffset": 29870592 }, { "name": "encoder.layer.9.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3072, "byteOffset": 30755328 }, { "name": "encoder.layer.9.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 30758400 }, { "name": "encoder.layer.9.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 31938048 }, { "name": "encoder.layer.9.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 31938816 }, { "name": "encoder.layer.9.output.dense.bias", "shape": [ 384 ], "dtype": "bfloat16", "format": "raw", "nbytes": 768, "byteOffset": 31939584 }, { "name": "encoder.layer.9.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1179648, "byteOffset": 31940352 } ], "md5sum": "4f150cdc4aff3c33d8a38c07d140b60b" } ] }