roberta-multi-cls-model-q0f32-MLC / ndarray-cache.json
jykim310's picture
repo init
b8b7770 verified
raw
history blame
78.6 kB
{
"metadata": {
"ParamSize": 205,
"ParamBytes": 500957200.0,
"BitsPerParam": 32.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 77207040,
"records": [
{
"name": "roberta.embeddings.word_embeddings.weight",
"shape": [
50265,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 77207040,
"byteOffset": 0
}
],
"md5sum": "0ee50f5a56c71ab263fc9675afaa5355"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 32699912,
"records": [
{
"name": "latency_classifier.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 0
},
{
"name": "latency_classifier.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 1536
},
{
"name": "latency_classifier.out_proj.bias",
"shape": [
2
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4,
"byteOffset": 1181184
},
{
"name": "latency_classifier.out_proj.weight",
"shape": [
2,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 1181188
},
{
"name": "quality_classifier.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 1184260
},
{
"name": "quality_classifier.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 1185796
},
{
"name": "quality_classifier.out_proj.bias",
"shape": [
2
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4,
"byteOffset": 2365444
},
{
"name": "quality_classifier.out_proj.weight",
"shape": [
2,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 2365448
},
{
"name": "roberta.embeddings.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 2368520
},
{
"name": "roberta.embeddings.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 2370056
},
{
"name": "roberta.embeddings.position_embeddings.weight",
"shape": [
514,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 789504,
"byteOffset": 2371592
},
{
"name": "roberta.embeddings.token_type_embeddings.weight",
"shape": [
1,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 3161096
},
{
"name": "roberta.encoder.layer.0.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 3162632
},
{
"name": "roberta.encoder.layer.0.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 3164168
},
{
"name": "roberta.encoder.layer.0.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 3165704
},
{
"name": "roberta.encoder.layer.0.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 3167240
},
{
"name": "roberta.encoder.layer.0.attention.self.key.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4346888
},
{
"name": "roberta.encoder.layer.0.attention.self.key.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 4348424
},
{
"name": "roberta.encoder.layer.0.attention.self.query.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 5528072
},
{
"name": "roberta.encoder.layer.0.attention.self.query.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 5529608
},
{
"name": "roberta.encoder.layer.0.attention.self.value.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 6709256
},
{
"name": "roberta.encoder.layer.0.attention.self.value.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 6710792
},
{
"name": "roberta.encoder.layer.0.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 7890440
},
{
"name": "roberta.encoder.layer.0.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 7896584
},
{
"name": "roberta.encoder.layer.0.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 12615176
},
{
"name": "roberta.encoder.layer.0.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 12616712
},
{
"name": "roberta.encoder.layer.0.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 12618248
},
{
"name": "roberta.encoder.layer.0.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 12619784
},
{
"name": "roberta.encoder.layer.1.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 17338376
},
{
"name": "roberta.encoder.layer.1.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 17339912
},
{
"name": "roberta.encoder.layer.1.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 17341448
},
{
"name": "roberta.encoder.layer.1.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17342984
},
{
"name": "roberta.encoder.layer.1.attention.self.key.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18522632
},
{
"name": "roberta.encoder.layer.1.attention.self.key.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 18524168
},
{
"name": "roberta.encoder.layer.1.attention.self.query.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 19703816
},
{
"name": "roberta.encoder.layer.1.attention.self.query.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 19705352
},
{
"name": "roberta.encoder.layer.1.attention.self.value.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 20885000
},
{
"name": "roberta.encoder.layer.1.attention.self.value.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 20886536
},
{
"name": "roberta.encoder.layer.1.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 22066184
},
{
"name": "roberta.encoder.layer.1.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22072328
},
{
"name": "roberta.encoder.layer.1.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 26790920
},
{
"name": "roberta.encoder.layer.1.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 26792456
},
{
"name": "roberta.encoder.layer.1.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 26793992
},
{
"name": "roberta.encoder.layer.1.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26795528
},
{
"name": "roberta.encoder.layer.10.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 31514120
},
{
"name": "roberta.encoder.layer.10.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 31515656
},
{
"name": "roberta.encoder.layer.10.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 31517192
},
{
"name": "roberta.encoder.layer.10.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 31518728
},
{
"name": "roberta.encoder.layer.10.attention.self.key.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 32698376
}
],
"md5sum": "e9336bafc416e4226095927ad9be91bb"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 31899648,
"records": [
{
"name": "roberta.encoder.layer.10.attention.self.key.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 0
},
{
"name": "roberta.encoder.layer.10.attention.self.query.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 1179648
},
{
"name": "roberta.encoder.layer.10.attention.self.query.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 1181184
},
{
"name": "roberta.encoder.layer.10.attention.self.value.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 2360832
},
{
"name": "roberta.encoder.layer.10.attention.self.value.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 2362368
},
{
"name": "roberta.encoder.layer.10.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 3542016
},
{
"name": "roberta.encoder.layer.10.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 3548160
},
{
"name": "roberta.encoder.layer.10.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 8266752
},
{
"name": "roberta.encoder.layer.10.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 8268288
},
{
"name": "roberta.encoder.layer.10.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 8269824
},
{
"name": "roberta.encoder.layer.10.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 8271360
},
{
"name": "roberta.encoder.layer.11.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 12989952
},
{
"name": "roberta.encoder.layer.11.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 12991488
},
{
"name": "roberta.encoder.layer.11.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 12993024
},
{
"name": "roberta.encoder.layer.11.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 12994560
},
{
"name": "roberta.encoder.layer.11.attention.self.key.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14174208
},
{
"name": "roberta.encoder.layer.11.attention.self.key.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 14175744
},
{
"name": "roberta.encoder.layer.11.attention.self.query.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 15355392
},
{
"name": "roberta.encoder.layer.11.attention.self.query.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 15356928
},
{
"name": "roberta.encoder.layer.11.attention.self.value.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 16536576
},
{
"name": "roberta.encoder.layer.11.attention.self.value.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 16538112
},
{
"name": "roberta.encoder.layer.11.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17717760
},
{
"name": "roberta.encoder.layer.11.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17723904
},
{
"name": "roberta.encoder.layer.11.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 22442496
},
{
"name": "roberta.encoder.layer.11.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 22444032
},
{
"name": "roberta.encoder.layer.11.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 22445568
},
{
"name": "roberta.encoder.layer.11.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 22447104
},
{
"name": "roberta.encoder.layer.2.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 27165696
},
{
"name": "roberta.encoder.layer.2.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 27167232
},
{
"name": "roberta.encoder.layer.2.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 27168768
},
{
"name": "roberta.encoder.layer.2.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 27170304
},
{
"name": "roberta.encoder.layer.2.attention.self.key.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28349952
},
{
"name": "roberta.encoder.layer.2.attention.self.key.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 28351488
},
{
"name": "roberta.encoder.layer.2.attention.self.query.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 29531136
},
{
"name": "roberta.encoder.layer.2.attention.self.query.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 29532672
},
{
"name": "roberta.encoder.layer.2.attention.self.value.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 30712320
},
{
"name": "roberta.encoder.layer.2.attention.self.value.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 30713856
},
{
"name": "roberta.encoder.layer.2.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31893504
}
],
"md5sum": "492d8fb1932ad7de10537a641e235a07"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 33074688,
"records": [
{
"name": "roberta.encoder.layer.2.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "roberta.encoder.layer.2.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4718592
},
{
"name": "roberta.encoder.layer.2.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4720128
},
{
"name": "roberta.encoder.layer.2.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4721664
},
{
"name": "roberta.encoder.layer.2.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 4723200
},
{
"name": "roberta.encoder.layer.3.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9441792
},
{
"name": "roberta.encoder.layer.3.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9443328
},
{
"name": "roberta.encoder.layer.3.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9444864
},
{
"name": "roberta.encoder.layer.3.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 9446400
},
{
"name": "roberta.encoder.layer.3.attention.self.key.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 10626048
},
{
"name": "roberta.encoder.layer.3.attention.self.key.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 10627584
},
{
"name": "roberta.encoder.layer.3.attention.self.query.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 11807232
},
{
"name": "roberta.encoder.layer.3.attention.self.query.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 11808768
},
{
"name": "roberta.encoder.layer.3.attention.self.value.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 12988416
},
{
"name": "roberta.encoder.layer.3.attention.self.value.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 12989952
},
{
"name": "roberta.encoder.layer.3.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14169600
},
{
"name": "roberta.encoder.layer.3.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 14175744
},
{
"name": "roberta.encoder.layer.3.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18894336
},
{
"name": "roberta.encoder.layer.3.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18895872
},
{
"name": "roberta.encoder.layer.3.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18897408
},
{
"name": "roberta.encoder.layer.3.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18898944
},
{
"name": "roberta.encoder.layer.4.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23617536
},
{
"name": "roberta.encoder.layer.4.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23619072
},
{
"name": "roberta.encoder.layer.4.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23620608
},
{
"name": "roberta.encoder.layer.4.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 23622144
},
{
"name": "roberta.encoder.layer.4.attention.self.key.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 24801792
},
{
"name": "roberta.encoder.layer.4.attention.self.key.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 24803328
},
{
"name": "roberta.encoder.layer.4.attention.self.query.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 25982976
},
{
"name": "roberta.encoder.layer.4.attention.self.query.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 25984512
},
{
"name": "roberta.encoder.layer.4.attention.self.value.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 27164160
},
{
"name": "roberta.encoder.layer.4.attention.self.value.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 27165696
},
{
"name": "roberta.encoder.layer.4.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28345344
},
{
"name": "roberta.encoder.layer.4.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28351488
},
{
"name": "roberta.encoder.layer.4.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33070080
},
{
"name": "roberta.encoder.layer.4.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33071616
},
{
"name": "roberta.encoder.layer.4.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33073152
}
],
"md5sum": "19336f0b44e4910392f92e00c89f024a"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 33074688,
"records": [
{
"name": "roberta.encoder.layer.4.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "roberta.encoder.layer.5.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4718592
},
{
"name": "roberta.encoder.layer.5.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4720128
},
{
"name": "roberta.encoder.layer.5.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4721664
},
{
"name": "roberta.encoder.layer.5.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 4723200
},
{
"name": "roberta.encoder.layer.5.attention.self.key.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 5902848
},
{
"name": "roberta.encoder.layer.5.attention.self.key.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 5904384
},
{
"name": "roberta.encoder.layer.5.attention.self.query.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 7084032
},
{
"name": "roberta.encoder.layer.5.attention.self.query.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 7085568
},
{
"name": "roberta.encoder.layer.5.attention.self.value.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 8265216
},
{
"name": "roberta.encoder.layer.5.attention.self.value.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 8266752
},
{
"name": "roberta.encoder.layer.5.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9446400
},
{
"name": "roberta.encoder.layer.5.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9452544
},
{
"name": "roberta.encoder.layer.5.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14171136
},
{
"name": "roberta.encoder.layer.5.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14172672
},
{
"name": "roberta.encoder.layer.5.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14174208
},
{
"name": "roberta.encoder.layer.5.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 14175744
},
{
"name": "roberta.encoder.layer.6.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18894336
},
{
"name": "roberta.encoder.layer.6.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18895872
},
{
"name": "roberta.encoder.layer.6.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18897408
},
{
"name": "roberta.encoder.layer.6.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 18898944
},
{
"name": "roberta.encoder.layer.6.attention.self.key.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 20078592
},
{
"name": "roberta.encoder.layer.6.attention.self.key.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 20080128
},
{
"name": "roberta.encoder.layer.6.attention.self.query.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 21259776
},
{
"name": "roberta.encoder.layer.6.attention.self.query.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 21261312
},
{
"name": "roberta.encoder.layer.6.attention.self.value.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 22440960
},
{
"name": "roberta.encoder.layer.6.attention.self.value.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 22442496
},
{
"name": "roberta.encoder.layer.6.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23622144
},
{
"name": "roberta.encoder.layer.6.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 23628288
},
{
"name": "roberta.encoder.layer.6.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28346880
},
{
"name": "roberta.encoder.layer.6.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28348416
},
{
"name": "roberta.encoder.layer.6.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28349952
},
{
"name": "roberta.encoder.layer.6.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28351488
},
{
"name": "roberta.encoder.layer.7.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33070080
},
{
"name": "roberta.encoder.layer.7.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33071616
},
{
"name": "roberta.encoder.layer.7.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33073152
}
],
"md5sum": "00faacc7c0be83772391709c301afe7f"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 33080832,
"records": [
{
"name": "roberta.encoder.layer.7.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 0
},
{
"name": "roberta.encoder.layer.7.attention.self.key.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 1179648
},
{
"name": "roberta.encoder.layer.7.attention.self.key.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 1181184
},
{
"name": "roberta.encoder.layer.7.attention.self.query.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 2360832
},
{
"name": "roberta.encoder.layer.7.attention.self.query.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 2362368
},
{
"name": "roberta.encoder.layer.7.attention.self.value.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 3542016
},
{
"name": "roberta.encoder.layer.7.attention.self.value.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 3543552
},
{
"name": "roberta.encoder.layer.7.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 4723200
},
{
"name": "roberta.encoder.layer.7.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 4729344
},
{
"name": "roberta.encoder.layer.7.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9447936
},
{
"name": "roberta.encoder.layer.7.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9449472
},
{
"name": "roberta.encoder.layer.7.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9451008
},
{
"name": "roberta.encoder.layer.7.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9452544
},
{
"name": "roberta.encoder.layer.8.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14171136
},
{
"name": "roberta.encoder.layer.8.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14172672
},
{
"name": "roberta.encoder.layer.8.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14174208
},
{
"name": "roberta.encoder.layer.8.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 14175744
},
{
"name": "roberta.encoder.layer.8.attention.self.key.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 15355392
},
{
"name": "roberta.encoder.layer.8.attention.self.key.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 15356928
},
{
"name": "roberta.encoder.layer.8.attention.self.query.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 16536576
},
{
"name": "roberta.encoder.layer.8.attention.self.query.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 16538112
},
{
"name": "roberta.encoder.layer.8.attention.self.value.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 17717760
},
{
"name": "roberta.encoder.layer.8.attention.self.value.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17719296
},
{
"name": "roberta.encoder.layer.8.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18898944
},
{
"name": "roberta.encoder.layer.8.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18905088
},
{
"name": "roberta.encoder.layer.8.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23623680
},
{
"name": "roberta.encoder.layer.8.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23625216
},
{
"name": "roberta.encoder.layer.8.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23626752
},
{
"name": "roberta.encoder.layer.8.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 23628288
},
{
"name": "roberta.encoder.layer.9.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28346880
},
{
"name": "roberta.encoder.layer.9.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28348416
},
{
"name": "roberta.encoder.layer.9.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28349952
},
{
"name": "roberta.encoder.layer.9.attention.output.dense.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 28351488
},
{
"name": "roberta.encoder.layer.9.attention.self.key.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 29531136
},
{
"name": "roberta.encoder.layer.9.attention.self.key.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 29532672
},
{
"name": "roberta.encoder.layer.9.attention.self.query.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 30712320
},
{
"name": "roberta.encoder.layer.9.attention.self.query.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 30713856
},
{
"name": "roberta.encoder.layer.9.attention.self.value.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 31893504
},
{
"name": "roberta.encoder.layer.9.attention.self.value.weight",
"shape": [
768,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 31895040
},
{
"name": "roberta.encoder.layer.9.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33074688
}
],
"md5sum": "a7749b063182508241ed8bb93c14fd44"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 9441792,
"records": [
{
"name": "roberta.encoder.layer.9.intermediate.dense.weight",
"shape": [
3072,
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "roberta.encoder.layer.9.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4718592
},
{
"name": "roberta.encoder.layer.9.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4720128
},
{
"name": "roberta.encoder.layer.9.output.dense.bias",
"shape": [
768
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4721664
},
{
"name": "roberta.encoder.layer.9.output.dense.weight",
"shape": [
768,
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 4723200
}
],
"md5sum": "97dba7c22f9d1c0510901a4a4c7fea86"
}
]
}