Charlie Ruan commited on
Commit
5892059
·
1 Parent(s): 2dd8bb7

Add weights

Browse files
mlc-chat-config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "bert",
4
+ "quantization": "q0f32",
5
+ "model_config": {
6
+ "vocab_size": 30522,
7
+ "hidden_size": 384,
8
+ "num_hidden_layers": 12,
9
+ "num_attention_heads": 12,
10
+ "intermediate_size": 1536,
11
+ "hidden_act": "gelu",
12
+ "layer_norm_eps": 1e-12,
13
+ "context_window_size": 512,
14
+ "prefill_chunk_size": 512,
15
+ "tensor_parallel_shards": 1,
16
+ "head_dim": 32,
17
+ "max_batch_size": 80
18
+ },
19
+ "vocab_size": 30522,
20
+ "context_window_size": 512,
21
+ "sliding_window_size": -1,
22
+ "prefill_chunk_size": 512,
23
+ "attention_sink_size": -1,
24
+ "tensor_parallel_shards": 1,
25
+ "pipeline_parallel_stages": 1,
26
+ "temperature": 1.0,
27
+ "presence_penalty": 0.0,
28
+ "frequency_penalty": 0.0,
29
+ "repetition_penalty": 1.0,
30
+ "top_p": 1.0,
31
+ "tokenizer_files": [
32
+ "tokenizer.json",
33
+ "tokenizer_config.json"
34
+ ],
35
+ "tokenizer_info": {
36
+ "token_postproc_method": "byte_fallback",
37
+ "prepend_space_in_encode": false,
38
+ "strip_space_in_decode": false
39
+ },
40
+ "conv_template": {
41
+ "name": "LM",
42
+ "system_template": "{system_message}",
43
+ "system_message": "",
44
+ "system_prefix_token_ids": [
45
+ 1
46
+ ],
47
+ "add_role_after_system_message": true,
48
+ "roles": {
49
+ "user": "",
50
+ "assistant": ""
51
+ },
52
+ "role_templates": {
53
+ "user": "{user_message}",
54
+ "assistant": "{assistant_message}",
55
+ "tool": "{tool_message}"
56
+ },
57
+ "messages": [],
58
+ "seps": [
59
+ ""
60
+ ],
61
+ "role_content_sep": "",
62
+ "role_empty_sep": "",
63
+ "stop_str": [],
64
+ "stop_token_ids": [
65
+ 2
66
+ ],
67
+ "function_string": "",
68
+ "use_function_calling": false
69
+ },
70
+ "pad_token_id": 0,
71
+ "bos_token_id": 1,
72
+ "eos_token_id": 2
73
+ }
ndarray-cache-b16.json ADDED
@@ -0,0 +1,1566 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 149,
4
+ "ParamBytes": 132848640.0,
5
+ "BitsPerParam": 32.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 33304320,
12
+ "records": [
13
+ {
14
+ "name": "embeddings.LayerNorm.bias",
15
+ "shape": [
16
+ 384
17
+ ],
18
+ "dtype": "bfloat16",
19
+ "format": "raw",
20
+ "nbytes": 768,
21
+ "byteOffset": 0
22
+ },
23
+ {
24
+ "name": "embeddings.LayerNorm.weight",
25
+ "shape": [
26
+ 384
27
+ ],
28
+ "dtype": "bfloat16",
29
+ "format": "raw",
30
+ "nbytes": 768,
31
+ "byteOffset": 768
32
+ },
33
+ {
34
+ "name": "embeddings.position_embeddings.weight",
35
+ "shape": [
36
+ 512,
37
+ 384
38
+ ],
39
+ "dtype": "bfloat16",
40
+ "format": "raw",
41
+ "nbytes": 393216,
42
+ "byteOffset": 1536
43
+ },
44
+ {
45
+ "name": "embeddings.token_type_embeddings.weight",
46
+ "shape": [
47
+ 2,
48
+ 384
49
+ ],
50
+ "dtype": "bfloat16",
51
+ "format": "raw",
52
+ "nbytes": 1536,
53
+ "byteOffset": 394752
54
+ },
55
+ {
56
+ "name": "embeddings.word_embeddings.weight",
57
+ "shape": [
58
+ 30522,
59
+ 384
60
+ ],
61
+ "dtype": "bfloat16",
62
+ "format": "raw",
63
+ "nbytes": 23440896,
64
+ "byteOffset": 396288
65
+ },
66
+ {
67
+ "name": "encoder.layer.0.attention.output.LayerNorm.bias",
68
+ "shape": [
69
+ 384
70
+ ],
71
+ "dtype": "bfloat16",
72
+ "format": "raw",
73
+ "nbytes": 768,
74
+ "byteOffset": 23837184
75
+ },
76
+ {
77
+ "name": "encoder.layer.0.attention.output.LayerNorm.weight",
78
+ "shape": [
79
+ 384
80
+ ],
81
+ "dtype": "bfloat16",
82
+ "format": "raw",
83
+ "nbytes": 768,
84
+ "byteOffset": 23837952
85
+ },
86
+ {
87
+ "name": "encoder.layer.0.attention.output.dense.bias",
88
+ "shape": [
89
+ 384
90
+ ],
91
+ "dtype": "bfloat16",
92
+ "format": "raw",
93
+ "nbytes": 768,
94
+ "byteOffset": 23838720
95
+ },
96
+ {
97
+ "name": "encoder.layer.0.attention.output.dense.weight",
98
+ "shape": [
99
+ 384,
100
+ 384
101
+ ],
102
+ "dtype": "bfloat16",
103
+ "format": "raw",
104
+ "nbytes": 294912,
105
+ "byteOffset": 23839488
106
+ },
107
+ {
108
+ "name": "encoder.layer.0.attention.self.qkv.bias",
109
+ "shape": [
110
+ 1152
111
+ ],
112
+ "dtype": "bfloat16",
113
+ "format": "raw",
114
+ "nbytes": 2304,
115
+ "byteOffset": 24134400
116
+ },
117
+ {
118
+ "name": "encoder.layer.0.attention.self.qkv.weight",
119
+ "shape": [
120
+ 1152,
121
+ 384
122
+ ],
123
+ "dtype": "bfloat16",
124
+ "format": "raw",
125
+ "nbytes": 884736,
126
+ "byteOffset": 24136704
127
+ },
128
+ {
129
+ "name": "encoder.layer.0.intermediate.dense.bias",
130
+ "shape": [
131
+ 1536
132
+ ],
133
+ "dtype": "bfloat16",
134
+ "format": "raw",
135
+ "nbytes": 3072,
136
+ "byteOffset": 25021440
137
+ },
138
+ {
139
+ "name": "encoder.layer.0.intermediate.dense.weight",
140
+ "shape": [
141
+ 1536,
142
+ 384
143
+ ],
144
+ "dtype": "bfloat16",
145
+ "format": "raw",
146
+ "nbytes": 1179648,
147
+ "byteOffset": 25024512
148
+ },
149
+ {
150
+ "name": "encoder.layer.0.output.LayerNorm.bias",
151
+ "shape": [
152
+ 384
153
+ ],
154
+ "dtype": "bfloat16",
155
+ "format": "raw",
156
+ "nbytes": 768,
157
+ "byteOffset": 26204160
158
+ },
159
+ {
160
+ "name": "encoder.layer.0.output.LayerNorm.weight",
161
+ "shape": [
162
+ 384
163
+ ],
164
+ "dtype": "bfloat16",
165
+ "format": "raw",
166
+ "nbytes": 768,
167
+ "byteOffset": 26204928
168
+ },
169
+ {
170
+ "name": "encoder.layer.0.output.dense.bias",
171
+ "shape": [
172
+ 384
173
+ ],
174
+ "dtype": "bfloat16",
175
+ "format": "raw",
176
+ "nbytes": 768,
177
+ "byteOffset": 26205696
178
+ },
179
+ {
180
+ "name": "encoder.layer.0.output.dense.weight",
181
+ "shape": [
182
+ 384,
183
+ 1536
184
+ ],
185
+ "dtype": "bfloat16",
186
+ "format": "raw",
187
+ "nbytes": 1179648,
188
+ "byteOffset": 26206464
189
+ },
190
+ {
191
+ "name": "encoder.layer.1.attention.output.LayerNorm.bias",
192
+ "shape": [
193
+ 384
194
+ ],
195
+ "dtype": "bfloat16",
196
+ "format": "raw",
197
+ "nbytes": 768,
198
+ "byteOffset": 27386112
199
+ },
200
+ {
201
+ "name": "encoder.layer.1.attention.output.LayerNorm.weight",
202
+ "shape": [
203
+ 384
204
+ ],
205
+ "dtype": "bfloat16",
206
+ "format": "raw",
207
+ "nbytes": 768,
208
+ "byteOffset": 27386880
209
+ },
210
+ {
211
+ "name": "encoder.layer.1.attention.output.dense.bias",
212
+ "shape": [
213
+ 384
214
+ ],
215
+ "dtype": "bfloat16",
216
+ "format": "raw",
217
+ "nbytes": 768,
218
+ "byteOffset": 27387648
219
+ },
220
+ {
221
+ "name": "encoder.layer.1.attention.output.dense.weight",
222
+ "shape": [
223
+ 384,
224
+ 384
225
+ ],
226
+ "dtype": "bfloat16",
227
+ "format": "raw",
228
+ "nbytes": 294912,
229
+ "byteOffset": 27388416
230
+ },
231
+ {
232
+ "name": "encoder.layer.1.attention.self.qkv.bias",
233
+ "shape": [
234
+ 1152
235
+ ],
236
+ "dtype": "bfloat16",
237
+ "format": "raw",
238
+ "nbytes": 2304,
239
+ "byteOffset": 27683328
240
+ },
241
+ {
242
+ "name": "encoder.layer.1.attention.self.qkv.weight",
243
+ "shape": [
244
+ 1152,
245
+ 384
246
+ ],
247
+ "dtype": "bfloat16",
248
+ "format": "raw",
249
+ "nbytes": 884736,
250
+ "byteOffset": 27685632
251
+ },
252
+ {
253
+ "name": "encoder.layer.1.intermediate.dense.bias",
254
+ "shape": [
255
+ 1536
256
+ ],
257
+ "dtype": "bfloat16",
258
+ "format": "raw",
259
+ "nbytes": 3072,
260
+ "byteOffset": 28570368
261
+ },
262
+ {
263
+ "name": "encoder.layer.1.intermediate.dense.weight",
264
+ "shape": [
265
+ 1536,
266
+ 384
267
+ ],
268
+ "dtype": "bfloat16",
269
+ "format": "raw",
270
+ "nbytes": 1179648,
271
+ "byteOffset": 28573440
272
+ },
273
+ {
274
+ "name": "encoder.layer.1.output.LayerNorm.bias",
275
+ "shape": [
276
+ 384
277
+ ],
278
+ "dtype": "bfloat16",
279
+ "format": "raw",
280
+ "nbytes": 768,
281
+ "byteOffset": 29753088
282
+ },
283
+ {
284
+ "name": "encoder.layer.1.output.LayerNorm.weight",
285
+ "shape": [
286
+ 384
287
+ ],
288
+ "dtype": "bfloat16",
289
+ "format": "raw",
290
+ "nbytes": 768,
291
+ "byteOffset": 29753856
292
+ },
293
+ {
294
+ "name": "encoder.layer.1.output.dense.bias",
295
+ "shape": [
296
+ 384
297
+ ],
298
+ "dtype": "bfloat16",
299
+ "format": "raw",
300
+ "nbytes": 768,
301
+ "byteOffset": 29754624
302
+ },
303
+ {
304
+ "name": "encoder.layer.1.output.dense.weight",
305
+ "shape": [
306
+ 384,
307
+ 1536
308
+ ],
309
+ "dtype": "bfloat16",
310
+ "format": "raw",
311
+ "nbytes": 1179648,
312
+ "byteOffset": 29755392
313
+ },
314
+ {
315
+ "name": "encoder.layer.10.attention.output.LayerNorm.bias",
316
+ "shape": [
317
+ 384
318
+ ],
319
+ "dtype": "bfloat16",
320
+ "format": "raw",
321
+ "nbytes": 768,
322
+ "byteOffset": 30935040
323
+ },
324
+ {
325
+ "name": "encoder.layer.10.attention.output.LayerNorm.weight",
326
+ "shape": [
327
+ 384
328
+ ],
329
+ "dtype": "bfloat16",
330
+ "format": "raw",
331
+ "nbytes": 768,
332
+ "byteOffset": 30935808
333
+ },
334
+ {
335
+ "name": "encoder.layer.10.attention.output.dense.bias",
336
+ "shape": [
337
+ 384
338
+ ],
339
+ "dtype": "bfloat16",
340
+ "format": "raw",
341
+ "nbytes": 768,
342
+ "byteOffset": 30936576
343
+ },
344
+ {
345
+ "name": "encoder.layer.10.attention.output.dense.weight",
346
+ "shape": [
347
+ 384,
348
+ 384
349
+ ],
350
+ "dtype": "bfloat16",
351
+ "format": "raw",
352
+ "nbytes": 294912,
353
+ "byteOffset": 30937344
354
+ },
355
+ {
356
+ "name": "encoder.layer.10.attention.self.qkv.bias",
357
+ "shape": [
358
+ 1152
359
+ ],
360
+ "dtype": "bfloat16",
361
+ "format": "raw",
362
+ "nbytes": 2304,
363
+ "byteOffset": 31232256
364
+ },
365
+ {
366
+ "name": "encoder.layer.10.attention.self.qkv.weight",
367
+ "shape": [
368
+ 1152,
369
+ 384
370
+ ],
371
+ "dtype": "bfloat16",
372
+ "format": "raw",
373
+ "nbytes": 884736,
374
+ "byteOffset": 31234560
375
+ },
376
+ {
377
+ "name": "encoder.layer.10.intermediate.dense.bias",
378
+ "shape": [
379
+ 1536
380
+ ],
381
+ "dtype": "bfloat16",
382
+ "format": "raw",
383
+ "nbytes": 3072,
384
+ "byteOffset": 32119296
385
+ },
386
+ {
387
+ "name": "encoder.layer.10.intermediate.dense.weight",
388
+ "shape": [
389
+ 1536,
390
+ 384
391
+ ],
392
+ "dtype": "bfloat16",
393
+ "format": "raw",
394
+ "nbytes": 1179648,
395
+ "byteOffset": 32122368
396
+ },
397
+ {
398
+ "name": "encoder.layer.10.output.LayerNorm.bias",
399
+ "shape": [
400
+ 384
401
+ ],
402
+ "dtype": "bfloat16",
403
+ "format": "raw",
404
+ "nbytes": 768,
405
+ "byteOffset": 33302016
406
+ },
407
+ {
408
+ "name": "encoder.layer.10.output.LayerNorm.weight",
409
+ "shape": [
410
+ 384
411
+ ],
412
+ "dtype": "bfloat16",
413
+ "format": "raw",
414
+ "nbytes": 768,
415
+ "byteOffset": 33302784
416
+ },
417
+ {
418
+ "name": "encoder.layer.10.output.dense.bias",
419
+ "shape": [
420
+ 384
421
+ ],
422
+ "dtype": "bfloat16",
423
+ "format": "raw",
424
+ "nbytes": 768,
425
+ "byteOffset": 33303552
426
+ }
427
+ ],
428
+ "md5sum": "12f41dfd857e82988a77aaa528e2af9f"
429
+ },
430
+ {
431
+ "dataPath": "params_shard_1.bin",
432
+ "format": "raw-shard",
433
+ "nbytes": 33120000,
434
+ "records": [
435
+ {
436
+ "name": "encoder.layer.10.output.dense.weight",
437
+ "shape": [
438
+ 384,
439
+ 1536
440
+ ],
441
+ "dtype": "bfloat16",
442
+ "format": "raw",
443
+ "nbytes": 1179648,
444
+ "byteOffset": 0
445
+ },
446
+ {
447
+ "name": "encoder.layer.11.attention.output.LayerNorm.bias",
448
+ "shape": [
449
+ 384
450
+ ],
451
+ "dtype": "bfloat16",
452
+ "format": "raw",
453
+ "nbytes": 768,
454
+ "byteOffset": 1179648
455
+ },
456
+ {
457
+ "name": "encoder.layer.11.attention.output.LayerNorm.weight",
458
+ "shape": [
459
+ 384
460
+ ],
461
+ "dtype": "bfloat16",
462
+ "format": "raw",
463
+ "nbytes": 768,
464
+ "byteOffset": 1180416
465
+ },
466
+ {
467
+ "name": "encoder.layer.11.attention.output.dense.bias",
468
+ "shape": [
469
+ 384
470
+ ],
471
+ "dtype": "bfloat16",
472
+ "format": "raw",
473
+ "nbytes": 768,
474
+ "byteOffset": 1181184
475
+ },
476
+ {
477
+ "name": "encoder.layer.11.attention.output.dense.weight",
478
+ "shape": [
479
+ 384,
480
+ 384
481
+ ],
482
+ "dtype": "bfloat16",
483
+ "format": "raw",
484
+ "nbytes": 294912,
485
+ "byteOffset": 1181952
486
+ },
487
+ {
488
+ "name": "encoder.layer.11.attention.self.qkv.bias",
489
+ "shape": [
490
+ 1152
491
+ ],
492
+ "dtype": "bfloat16",
493
+ "format": "raw",
494
+ "nbytes": 2304,
495
+ "byteOffset": 1476864
496
+ },
497
+ {
498
+ "name": "encoder.layer.11.attention.self.qkv.weight",
499
+ "shape": [
500
+ 1152,
501
+ 384
502
+ ],
503
+ "dtype": "bfloat16",
504
+ "format": "raw",
505
+ "nbytes": 884736,
506
+ "byteOffset": 1479168
507
+ },
508
+ {
509
+ "name": "encoder.layer.11.intermediate.dense.bias",
510
+ "shape": [
511
+ 1536
512
+ ],
513
+ "dtype": "bfloat16",
514
+ "format": "raw",
515
+ "nbytes": 3072,
516
+ "byteOffset": 2363904
517
+ },
518
+ {
519
+ "name": "encoder.layer.11.intermediate.dense.weight",
520
+ "shape": [
521
+ 1536,
522
+ 384
523
+ ],
524
+ "dtype": "bfloat16",
525
+ "format": "raw",
526
+ "nbytes": 1179648,
527
+ "byteOffset": 2366976
528
+ },
529
+ {
530
+ "name": "encoder.layer.11.output.LayerNorm.bias",
531
+ "shape": [
532
+ 384
533
+ ],
534
+ "dtype": "bfloat16",
535
+ "format": "raw",
536
+ "nbytes": 768,
537
+ "byteOffset": 3546624
538
+ },
539
+ {
540
+ "name": "encoder.layer.11.output.LayerNorm.weight",
541
+ "shape": [
542
+ 384
543
+ ],
544
+ "dtype": "bfloat16",
545
+ "format": "raw",
546
+ "nbytes": 768,
547
+ "byteOffset": 3547392
548
+ },
549
+ {
550
+ "name": "encoder.layer.11.output.dense.bias",
551
+ "shape": [
552
+ 384
553
+ ],
554
+ "dtype": "bfloat16",
555
+ "format": "raw",
556
+ "nbytes": 768,
557
+ "byteOffset": 3548160
558
+ },
559
+ {
560
+ "name": "encoder.layer.11.output.dense.weight",
561
+ "shape": [
562
+ 384,
563
+ 1536
564
+ ],
565
+ "dtype": "bfloat16",
566
+ "format": "raw",
567
+ "nbytes": 1179648,
568
+ "byteOffset": 3548928
569
+ },
570
+ {
571
+ "name": "encoder.layer.2.attention.output.LayerNorm.bias",
572
+ "shape": [
573
+ 384
574
+ ],
575
+ "dtype": "bfloat16",
576
+ "format": "raw",
577
+ "nbytes": 768,
578
+ "byteOffset": 4728576
579
+ },
580
+ {
581
+ "name": "encoder.layer.2.attention.output.LayerNorm.weight",
582
+ "shape": [
583
+ 384
584
+ ],
585
+ "dtype": "bfloat16",
586
+ "format": "raw",
587
+ "nbytes": 768,
588
+ "byteOffset": 4729344
589
+ },
590
+ {
591
+ "name": "encoder.layer.2.attention.output.dense.bias",
592
+ "shape": [
593
+ 384
594
+ ],
595
+ "dtype": "bfloat16",
596
+ "format": "raw",
597
+ "nbytes": 768,
598
+ "byteOffset": 4730112
599
+ },
600
+ {
601
+ "name": "encoder.layer.2.attention.output.dense.weight",
602
+ "shape": [
603
+ 384,
604
+ 384
605
+ ],
606
+ "dtype": "bfloat16",
607
+ "format": "raw",
608
+ "nbytes": 294912,
609
+ "byteOffset": 4730880
610
+ },
611
+ {
612
+ "name": "encoder.layer.2.attention.self.qkv.bias",
613
+ "shape": [
614
+ 1152
615
+ ],
616
+ "dtype": "bfloat16",
617
+ "format": "raw",
618
+ "nbytes": 2304,
619
+ "byteOffset": 5025792
620
+ },
621
+ {
622
+ "name": "encoder.layer.2.attention.self.qkv.weight",
623
+ "shape": [
624
+ 1152,
625
+ 384
626
+ ],
627
+ "dtype": "bfloat16",
628
+ "format": "raw",
629
+ "nbytes": 884736,
630
+ "byteOffset": 5028096
631
+ },
632
+ {
633
+ "name": "encoder.layer.2.intermediate.dense.bias",
634
+ "shape": [
635
+ 1536
636
+ ],
637
+ "dtype": "bfloat16",
638
+ "format": "raw",
639
+ "nbytes": 3072,
640
+ "byteOffset": 5912832
641
+ },
642
+ {
643
+ "name": "encoder.layer.2.intermediate.dense.weight",
644
+ "shape": [
645
+ 1536,
646
+ 384
647
+ ],
648
+ "dtype": "bfloat16",
649
+ "format": "raw",
650
+ "nbytes": 1179648,
651
+ "byteOffset": 5915904
652
+ },
653
+ {
654
+ "name": "encoder.layer.2.output.LayerNorm.bias",
655
+ "shape": [
656
+ 384
657
+ ],
658
+ "dtype": "bfloat16",
659
+ "format": "raw",
660
+ "nbytes": 768,
661
+ "byteOffset": 7095552
662
+ },
663
+ {
664
+ "name": "encoder.layer.2.output.LayerNorm.weight",
665
+ "shape": [
666
+ 384
667
+ ],
668
+ "dtype": "bfloat16",
669
+ "format": "raw",
670
+ "nbytes": 768,
671
+ "byteOffset": 7096320
672
+ },
673
+ {
674
+ "name": "encoder.layer.2.output.dense.bias",
675
+ "shape": [
676
+ 384
677
+ ],
678
+ "dtype": "bfloat16",
679
+ "format": "raw",
680
+ "nbytes": 768,
681
+ "byteOffset": 7097088
682
+ },
683
+ {
684
+ "name": "encoder.layer.2.output.dense.weight",
685
+ "shape": [
686
+ 384,
687
+ 1536
688
+ ],
689
+ "dtype": "bfloat16",
690
+ "format": "raw",
691
+ "nbytes": 1179648,
692
+ "byteOffset": 7097856
693
+ },
694
+ {
695
+ "name": "encoder.layer.3.attention.output.LayerNorm.bias",
696
+ "shape": [
697
+ 384
698
+ ],
699
+ "dtype": "bfloat16",
700
+ "format": "raw",
701
+ "nbytes": 768,
702
+ "byteOffset": 8277504
703
+ },
704
+ {
705
+ "name": "encoder.layer.3.attention.output.LayerNorm.weight",
706
+ "shape": [
707
+ 384
708
+ ],
709
+ "dtype": "bfloat16",
710
+ "format": "raw",
711
+ "nbytes": 768,
712
+ "byteOffset": 8278272
713
+ },
714
+ {
715
+ "name": "encoder.layer.3.attention.output.dense.bias",
716
+ "shape": [
717
+ 384
718
+ ],
719
+ "dtype": "bfloat16",
720
+ "format": "raw",
721
+ "nbytes": 768,
722
+ "byteOffset": 8279040
723
+ },
724
+ {
725
+ "name": "encoder.layer.3.attention.output.dense.weight",
726
+ "shape": [
727
+ 384,
728
+ 384
729
+ ],
730
+ "dtype": "bfloat16",
731
+ "format": "raw",
732
+ "nbytes": 294912,
733
+ "byteOffset": 8279808
734
+ },
735
+ {
736
+ "name": "encoder.layer.3.attention.self.qkv.bias",
737
+ "shape": [
738
+ 1152
739
+ ],
740
+ "dtype": "bfloat16",
741
+ "format": "raw",
742
+ "nbytes": 2304,
743
+ "byteOffset": 8574720
744
+ },
745
+ {
746
+ "name": "encoder.layer.3.attention.self.qkv.weight",
747
+ "shape": [
748
+ 1152,
749
+ 384
750
+ ],
751
+ "dtype": "bfloat16",
752
+ "format": "raw",
753
+ "nbytes": 884736,
754
+ "byteOffset": 8577024
755
+ },
756
+ {
757
+ "name": "encoder.layer.3.intermediate.dense.bias",
758
+ "shape": [
759
+ 1536
760
+ ],
761
+ "dtype": "bfloat16",
762
+ "format": "raw",
763
+ "nbytes": 3072,
764
+ "byteOffset": 9461760
765
+ },
766
+ {
767
+ "name": "encoder.layer.3.intermediate.dense.weight",
768
+ "shape": [
769
+ 1536,
770
+ 384
771
+ ],
772
+ "dtype": "bfloat16",
773
+ "format": "raw",
774
+ "nbytes": 1179648,
775
+ "byteOffset": 9464832
776
+ },
777
+ {
778
+ "name": "encoder.layer.3.output.LayerNorm.bias",
779
+ "shape": [
780
+ 384
781
+ ],
782
+ "dtype": "bfloat16",
783
+ "format": "raw",
784
+ "nbytes": 768,
785
+ "byteOffset": 10644480
786
+ },
787
+ {
788
+ "name": "encoder.layer.3.output.LayerNorm.weight",
789
+ "shape": [
790
+ 384
791
+ ],
792
+ "dtype": "bfloat16",
793
+ "format": "raw",
794
+ "nbytes": 768,
795
+ "byteOffset": 10645248
796
+ },
797
+ {
798
+ "name": "encoder.layer.3.output.dense.bias",
799
+ "shape": [
800
+ 384
801
+ ],
802
+ "dtype": "bfloat16",
803
+ "format": "raw",
804
+ "nbytes": 768,
805
+ "byteOffset": 10646016
806
+ },
807
+ {
808
+ "name": "encoder.layer.3.output.dense.weight",
809
+ "shape": [
810
+ 384,
811
+ 1536
812
+ ],
813
+ "dtype": "bfloat16",
814
+ "format": "raw",
815
+ "nbytes": 1179648,
816
+ "byteOffset": 10646784
817
+ },
818
+ {
819
+ "name": "encoder.layer.4.attention.output.LayerNorm.bias",
820
+ "shape": [
821
+ 384
822
+ ],
823
+ "dtype": "bfloat16",
824
+ "format": "raw",
825
+ "nbytes": 768,
826
+ "byteOffset": 11826432
827
+ },
828
+ {
829
+ "name": "encoder.layer.4.attention.output.LayerNorm.weight",
830
+ "shape": [
831
+ 384
832
+ ],
833
+ "dtype": "bfloat16",
834
+ "format": "raw",
835
+ "nbytes": 768,
836
+ "byteOffset": 11827200
837
+ },
838
+ {
839
+ "name": "encoder.layer.4.attention.output.dense.bias",
840
+ "shape": [
841
+ 384
842
+ ],
843
+ "dtype": "bfloat16",
844
+ "format": "raw",
845
+ "nbytes": 768,
846
+ "byteOffset": 11827968
847
+ },
848
+ {
849
+ "name": "encoder.layer.4.attention.output.dense.weight",
850
+ "shape": [
851
+ 384,
852
+ 384
853
+ ],
854
+ "dtype": "bfloat16",
855
+ "format": "raw",
856
+ "nbytes": 294912,
857
+ "byteOffset": 11828736
858
+ },
859
+ {
860
+ "name": "encoder.layer.4.attention.self.qkv.bias",
861
+ "shape": [
862
+ 1152
863
+ ],
864
+ "dtype": "bfloat16",
865
+ "format": "raw",
866
+ "nbytes": 2304,
867
+ "byteOffset": 12123648
868
+ },
869
+ {
870
+ "name": "encoder.layer.4.attention.self.qkv.weight",
871
+ "shape": [
872
+ 1152,
873
+ 384
874
+ ],
875
+ "dtype": "bfloat16",
876
+ "format": "raw",
877
+ "nbytes": 884736,
878
+ "byteOffset": 12125952
879
+ },
880
+ {
881
+ "name": "encoder.layer.4.intermediate.dense.bias",
882
+ "shape": [
883
+ 1536
884
+ ],
885
+ "dtype": "bfloat16",
886
+ "format": "raw",
887
+ "nbytes": 3072,
888
+ "byteOffset": 13010688
889
+ },
890
+ {
891
+ "name": "encoder.layer.4.intermediate.dense.weight",
892
+ "shape": [
893
+ 1536,
894
+ 384
895
+ ],
896
+ "dtype": "bfloat16",
897
+ "format": "raw",
898
+ "nbytes": 1179648,
899
+ "byteOffset": 13013760
900
+ },
901
+ {
902
+ "name": "encoder.layer.4.output.LayerNorm.bias",
903
+ "shape": [
904
+ 384
905
+ ],
906
+ "dtype": "bfloat16",
907
+ "format": "raw",
908
+ "nbytes": 768,
909
+ "byteOffset": 14193408
910
+ },
911
+ {
912
+ "name": "encoder.layer.4.output.LayerNorm.weight",
913
+ "shape": [
914
+ 384
915
+ ],
916
+ "dtype": "bfloat16",
917
+ "format": "raw",
918
+ "nbytes": 768,
919
+ "byteOffset": 14194176
920
+ },
921
+ {
922
+ "name": "encoder.layer.4.output.dense.bias",
923
+ "shape": [
924
+ 384
925
+ ],
926
+ "dtype": "bfloat16",
927
+ "format": "raw",
928
+ "nbytes": 768,
929
+ "byteOffset": 14194944
930
+ },
931
+ {
932
+ "name": "encoder.layer.4.output.dense.weight",
933
+ "shape": [
934
+ 384,
935
+ 1536
936
+ ],
937
+ "dtype": "bfloat16",
938
+ "format": "raw",
939
+ "nbytes": 1179648,
940
+ "byteOffset": 14195712
941
+ },
942
+ {
943
+ "name": "encoder.layer.5.attention.output.LayerNorm.bias",
944
+ "shape": [
945
+ 384
946
+ ],
947
+ "dtype": "bfloat16",
948
+ "format": "raw",
949
+ "nbytes": 768,
950
+ "byteOffset": 15375360
951
+ },
952
+ {
953
+ "name": "encoder.layer.5.attention.output.LayerNorm.weight",
954
+ "shape": [
955
+ 384
956
+ ],
957
+ "dtype": "bfloat16",
958
+ "format": "raw",
959
+ "nbytes": 768,
960
+ "byteOffset": 15376128
961
+ },
962
+ {
963
+ "name": "encoder.layer.5.attention.output.dense.bias",
964
+ "shape": [
965
+ 384
966
+ ],
967
+ "dtype": "bfloat16",
968
+ "format": "raw",
969
+ "nbytes": 768,
970
+ "byteOffset": 15376896
971
+ },
972
+ {
973
+ "name": "encoder.layer.5.attention.output.dense.weight",
974
+ "shape": [
975
+ 384,
976
+ 384
977
+ ],
978
+ "dtype": "bfloat16",
979
+ "format": "raw",
980
+ "nbytes": 294912,
981
+ "byteOffset": 15377664
982
+ },
983
+ {
984
+ "name": "encoder.layer.5.attention.self.qkv.bias",
985
+ "shape": [
986
+ 1152
987
+ ],
988
+ "dtype": "bfloat16",
989
+ "format": "raw",
990
+ "nbytes": 2304,
991
+ "byteOffset": 15672576
992
+ },
993
+ {
994
+ "name": "encoder.layer.5.attention.self.qkv.weight",
995
+ "shape": [
996
+ 1152,
997
+ 384
998
+ ],
999
+ "dtype": "bfloat16",
1000
+ "format": "raw",
1001
+ "nbytes": 884736,
1002
+ "byteOffset": 15674880
1003
+ },
1004
+ {
1005
+ "name": "encoder.layer.5.intermediate.dense.bias",
1006
+ "shape": [
1007
+ 1536
1008
+ ],
1009
+ "dtype": "bfloat16",
1010
+ "format": "raw",
1011
+ "nbytes": 3072,
1012
+ "byteOffset": 16559616
1013
+ },
1014
+ {
1015
+ "name": "encoder.layer.5.intermediate.dense.weight",
1016
+ "shape": [
1017
+ 1536,
1018
+ 384
1019
+ ],
1020
+ "dtype": "bfloat16",
1021
+ "format": "raw",
1022
+ "nbytes": 1179648,
1023
+ "byteOffset": 16562688
1024
+ },
1025
+ {
1026
+ "name": "encoder.layer.5.output.LayerNorm.bias",
1027
+ "shape": [
1028
+ 384
1029
+ ],
1030
+ "dtype": "bfloat16",
1031
+ "format": "raw",
1032
+ "nbytes": 768,
1033
+ "byteOffset": 17742336
1034
+ },
1035
+ {
1036
+ "name": "encoder.layer.5.output.LayerNorm.weight",
1037
+ "shape": [
1038
+ 384
1039
+ ],
1040
+ "dtype": "bfloat16",
1041
+ "format": "raw",
1042
+ "nbytes": 768,
1043
+ "byteOffset": 17743104
1044
+ },
1045
+ {
1046
+ "name": "encoder.layer.5.output.dense.bias",
1047
+ "shape": [
1048
+ 384
1049
+ ],
1050
+ "dtype": "bfloat16",
1051
+ "format": "raw",
1052
+ "nbytes": 768,
1053
+ "byteOffset": 17743872
1054
+ },
1055
+ {
1056
+ "name": "encoder.layer.5.output.dense.weight",
1057
+ "shape": [
1058
+ 384,
1059
+ 1536
1060
+ ],
1061
+ "dtype": "bfloat16",
1062
+ "format": "raw",
1063
+ "nbytes": 1179648,
1064
+ "byteOffset": 17744640
1065
+ },
1066
+ {
1067
+ "name": "encoder.layer.6.attention.output.LayerNorm.bias",
1068
+ "shape": [
1069
+ 384
1070
+ ],
1071
+ "dtype": "bfloat16",
1072
+ "format": "raw",
1073
+ "nbytes": 768,
1074
+ "byteOffset": 18924288
1075
+ },
1076
+ {
1077
+ "name": "encoder.layer.6.attention.output.LayerNorm.weight",
1078
+ "shape": [
1079
+ 384
1080
+ ],
1081
+ "dtype": "bfloat16",
1082
+ "format": "raw",
1083
+ "nbytes": 768,
1084
+ "byteOffset": 18925056
1085
+ },
1086
+ {
1087
+ "name": "encoder.layer.6.attention.output.dense.bias",
1088
+ "shape": [
1089
+ 384
1090
+ ],
1091
+ "dtype": "bfloat16",
1092
+ "format": "raw",
1093
+ "nbytes": 768,
1094
+ "byteOffset": 18925824
1095
+ },
1096
+ {
1097
+ "name": "encoder.layer.6.attention.output.dense.weight",
1098
+ "shape": [
1099
+ 384,
1100
+ 384
1101
+ ],
1102
+ "dtype": "bfloat16",
1103
+ "format": "raw",
1104
+ "nbytes": 294912,
1105
+ "byteOffset": 18926592
1106
+ },
1107
+ {
1108
+ "name": "encoder.layer.6.attention.self.qkv.bias",
1109
+ "shape": [
1110
+ 1152
1111
+ ],
1112
+ "dtype": "bfloat16",
1113
+ "format": "raw",
1114
+ "nbytes": 2304,
1115
+ "byteOffset": 19221504
1116
+ },
1117
+ {
1118
+ "name": "encoder.layer.6.attention.self.qkv.weight",
1119
+ "shape": [
1120
+ 1152,
1121
+ 384
1122
+ ],
1123
+ "dtype": "bfloat16",
1124
+ "format": "raw",
1125
+ "nbytes": 884736,
1126
+ "byteOffset": 19223808
1127
+ },
1128
+ {
1129
+ "name": "encoder.layer.6.intermediate.dense.bias",
1130
+ "shape": [
1131
+ 1536
1132
+ ],
1133
+ "dtype": "bfloat16",
1134
+ "format": "raw",
1135
+ "nbytes": 3072,
1136
+ "byteOffset": 20108544
1137
+ },
1138
+ {
1139
+ "name": "encoder.layer.6.intermediate.dense.weight",
1140
+ "shape": [
1141
+ 1536,
1142
+ 384
1143
+ ],
1144
+ "dtype": "bfloat16",
1145
+ "format": "raw",
1146
+ "nbytes": 1179648,
1147
+ "byteOffset": 20111616
1148
+ },
1149
+ {
1150
+ "name": "encoder.layer.6.output.LayerNorm.bias",
1151
+ "shape": [
1152
+ 384
1153
+ ],
1154
+ "dtype": "bfloat16",
1155
+ "format": "raw",
1156
+ "nbytes": 768,
1157
+ "byteOffset": 21291264
1158
+ },
1159
+ {
1160
+ "name": "encoder.layer.6.output.LayerNorm.weight",
1161
+ "shape": [
1162
+ 384
1163
+ ],
1164
+ "dtype": "bfloat16",
1165
+ "format": "raw",
1166
+ "nbytes": 768,
1167
+ "byteOffset": 21292032
1168
+ },
1169
+ {
1170
+ "name": "encoder.layer.6.output.dense.bias",
1171
+ "shape": [
1172
+ 384
1173
+ ],
1174
+ "dtype": "bfloat16",
1175
+ "format": "raw",
1176
+ "nbytes": 768,
1177
+ "byteOffset": 21292800
1178
+ },
1179
+ {
1180
+ "name": "encoder.layer.6.output.dense.weight",
1181
+ "shape": [
1182
+ 384,
1183
+ 1536
1184
+ ],
1185
+ "dtype": "bfloat16",
1186
+ "format": "raw",
1187
+ "nbytes": 1179648,
1188
+ "byteOffset": 21293568
1189
+ },
1190
+ {
1191
+ "name": "encoder.layer.7.attention.output.LayerNorm.bias",
1192
+ "shape": [
1193
+ 384
1194
+ ],
1195
+ "dtype": "bfloat16",
1196
+ "format": "raw",
1197
+ "nbytes": 768,
1198
+ "byteOffset": 22473216
1199
+ },
1200
+ {
1201
+ "name": "encoder.layer.7.attention.output.LayerNorm.weight",
1202
+ "shape": [
1203
+ 384
1204
+ ],
1205
+ "dtype": "bfloat16",
1206
+ "format": "raw",
1207
+ "nbytes": 768,
1208
+ "byteOffset": 22473984
1209
+ },
1210
+ {
1211
+ "name": "encoder.layer.7.attention.output.dense.bias",
1212
+ "shape": [
1213
+ 384
1214
+ ],
1215
+ "dtype": "bfloat16",
1216
+ "format": "raw",
1217
+ "nbytes": 768,
1218
+ "byteOffset": 22474752
1219
+ },
1220
+ {
1221
+ "name": "encoder.layer.7.attention.output.dense.weight",
1222
+ "shape": [
1223
+ 384,
1224
+ 384
1225
+ ],
1226
+ "dtype": "bfloat16",
1227
+ "format": "raw",
1228
+ "nbytes": 294912,
1229
+ "byteOffset": 22475520
1230
+ },
1231
+ {
1232
+ "name": "encoder.layer.7.attention.self.qkv.bias",
1233
+ "shape": [
1234
+ 1152
1235
+ ],
1236
+ "dtype": "bfloat16",
1237
+ "format": "raw",
1238
+ "nbytes": 2304,
1239
+ "byteOffset": 22770432
1240
+ },
1241
+ {
1242
+ "name": "encoder.layer.7.attention.self.qkv.weight",
1243
+ "shape": [
1244
+ 1152,
1245
+ 384
1246
+ ],
1247
+ "dtype": "bfloat16",
1248
+ "format": "raw",
1249
+ "nbytes": 884736,
1250
+ "byteOffset": 22772736
1251
+ },
1252
+ {
1253
+ "name": "encoder.layer.7.intermediate.dense.bias",
1254
+ "shape": [
1255
+ 1536
1256
+ ],
1257
+ "dtype": "bfloat16",
1258
+ "format": "raw",
1259
+ "nbytes": 3072,
1260
+ "byteOffset": 23657472
1261
+ },
1262
+ {
1263
+ "name": "encoder.layer.7.intermediate.dense.weight",
1264
+ "shape": [
1265
+ 1536,
1266
+ 384
1267
+ ],
1268
+ "dtype": "bfloat16",
1269
+ "format": "raw",
1270
+ "nbytes": 1179648,
1271
+ "byteOffset": 23660544
1272
+ },
1273
+ {
1274
+ "name": "encoder.layer.7.output.LayerNorm.bias",
1275
+ "shape": [
1276
+ 384
1277
+ ],
1278
+ "dtype": "bfloat16",
1279
+ "format": "raw",
1280
+ "nbytes": 768,
1281
+ "byteOffset": 24840192
1282
+ },
1283
+ {
1284
+ "name": "encoder.layer.7.output.LayerNorm.weight",
1285
+ "shape": [
1286
+ 384
1287
+ ],
1288
+ "dtype": "bfloat16",
1289
+ "format": "raw",
1290
+ "nbytes": 768,
1291
+ "byteOffset": 24840960
1292
+ },
1293
+ {
1294
+ "name": "encoder.layer.7.output.dense.bias",
1295
+ "shape": [
1296
+ 384
1297
+ ],
1298
+ "dtype": "bfloat16",
1299
+ "format": "raw",
1300
+ "nbytes": 768,
1301
+ "byteOffset": 24841728
1302
+ },
1303
+ {
1304
+ "name": "encoder.layer.7.output.dense.weight",
1305
+ "shape": [
1306
+ 384,
1307
+ 1536
1308
+ ],
1309
+ "dtype": "bfloat16",
1310
+ "format": "raw",
1311
+ "nbytes": 1179648,
1312
+ "byteOffset": 24842496
1313
+ },
1314
+ {
1315
+ "name": "encoder.layer.8.attention.output.LayerNorm.bias",
1316
+ "shape": [
1317
+ 384
1318
+ ],
1319
+ "dtype": "bfloat16",
1320
+ "format": "raw",
1321
+ "nbytes": 768,
1322
+ "byteOffset": 26022144
1323
+ },
1324
+ {
1325
+ "name": "encoder.layer.8.attention.output.LayerNorm.weight",
1326
+ "shape": [
1327
+ 384
1328
+ ],
1329
+ "dtype": "bfloat16",
1330
+ "format": "raw",
1331
+ "nbytes": 768,
1332
+ "byteOffset": 26022912
1333
+ },
1334
+ {
1335
+ "name": "encoder.layer.8.attention.output.dense.bias",
1336
+ "shape": [
1337
+ 384
1338
+ ],
1339
+ "dtype": "bfloat16",
1340
+ "format": "raw",
1341
+ "nbytes": 768,
1342
+ "byteOffset": 26023680
1343
+ },
1344
+ {
1345
+ "name": "encoder.layer.8.attention.output.dense.weight",
1346
+ "shape": [
1347
+ 384,
1348
+ 384
1349
+ ],
1350
+ "dtype": "bfloat16",
1351
+ "format": "raw",
1352
+ "nbytes": 294912,
1353
+ "byteOffset": 26024448
1354
+ },
1355
+ {
1356
+ "name": "encoder.layer.8.attention.self.qkv.bias",
1357
+ "shape": [
1358
+ 1152
1359
+ ],
1360
+ "dtype": "bfloat16",
1361
+ "format": "raw",
1362
+ "nbytes": 2304,
1363
+ "byteOffset": 26319360
1364
+ },
1365
+ {
1366
+ "name": "encoder.layer.8.attention.self.qkv.weight",
1367
+ "shape": [
1368
+ 1152,
1369
+ 384
1370
+ ],
1371
+ "dtype": "bfloat16",
1372
+ "format": "raw",
1373
+ "nbytes": 884736,
1374
+ "byteOffset": 26321664
1375
+ },
1376
+ {
1377
+ "name": "encoder.layer.8.intermediate.dense.bias",
1378
+ "shape": [
1379
+ 1536
1380
+ ],
1381
+ "dtype": "bfloat16",
1382
+ "format": "raw",
1383
+ "nbytes": 3072,
1384
+ "byteOffset": 27206400
1385
+ },
1386
+ {
1387
+ "name": "encoder.layer.8.intermediate.dense.weight",
1388
+ "shape": [
1389
+ 1536,
1390
+ 384
1391
+ ],
1392
+ "dtype": "bfloat16",
1393
+ "format": "raw",
1394
+ "nbytes": 1179648,
1395
+ "byteOffset": 27209472
1396
+ },
1397
+ {
1398
+ "name": "encoder.layer.8.output.LayerNorm.bias",
1399
+ "shape": [
1400
+ 384
1401
+ ],
1402
+ "dtype": "bfloat16",
1403
+ "format": "raw",
1404
+ "nbytes": 768,
1405
+ "byteOffset": 28389120
1406
+ },
1407
+ {
1408
+ "name": "encoder.layer.8.output.LayerNorm.weight",
1409
+ "shape": [
1410
+ 384
1411
+ ],
1412
+ "dtype": "bfloat16",
1413
+ "format": "raw",
1414
+ "nbytes": 768,
1415
+ "byteOffset": 28389888
1416
+ },
1417
+ {
1418
+ "name": "encoder.layer.8.output.dense.bias",
1419
+ "shape": [
1420
+ 384
1421
+ ],
1422
+ "dtype": "bfloat16",
1423
+ "format": "raw",
1424
+ "nbytes": 768,
1425
+ "byteOffset": 28390656
1426
+ },
1427
+ {
1428
+ "name": "encoder.layer.8.output.dense.weight",
1429
+ "shape": [
1430
+ 384,
1431
+ 1536
1432
+ ],
1433
+ "dtype": "bfloat16",
1434
+ "format": "raw",
1435
+ "nbytes": 1179648,
1436
+ "byteOffset": 28391424
1437
+ },
1438
+ {
1439
+ "name": "encoder.layer.9.attention.output.LayerNorm.bias",
1440
+ "shape": [
1441
+ 384
1442
+ ],
1443
+ "dtype": "bfloat16",
1444
+ "format": "raw",
1445
+ "nbytes": 768,
1446
+ "byteOffset": 29571072
1447
+ },
1448
+ {
1449
+ "name": "encoder.layer.9.attention.output.LayerNorm.weight",
1450
+ "shape": [
1451
+ 384
1452
+ ],
1453
+ "dtype": "bfloat16",
1454
+ "format": "raw",
1455
+ "nbytes": 768,
1456
+ "byteOffset": 29571840
1457
+ },
1458
+ {
1459
+ "name": "encoder.layer.9.attention.output.dense.bias",
1460
+ "shape": [
1461
+ 384
1462
+ ],
1463
+ "dtype": "bfloat16",
1464
+ "format": "raw",
1465
+ "nbytes": 768,
1466
+ "byteOffset": 29572608
1467
+ },
1468
+ {
1469
+ "name": "encoder.layer.9.attention.output.dense.weight",
1470
+ "shape": [
1471
+ 384,
1472
+ 384
1473
+ ],
1474
+ "dtype": "bfloat16",
1475
+ "format": "raw",
1476
+ "nbytes": 294912,
1477
+ "byteOffset": 29573376
1478
+ },
1479
+ {
1480
+ "name": "encoder.layer.9.attention.self.qkv.bias",
1481
+ "shape": [
1482
+ 1152
1483
+ ],
1484
+ "dtype": "bfloat16",
1485
+ "format": "raw",
1486
+ "nbytes": 2304,
1487
+ "byteOffset": 29868288
1488
+ },
1489
+ {
1490
+ "name": "encoder.layer.9.attention.self.qkv.weight",
1491
+ "shape": [
1492
+ 1152,
1493
+ 384
1494
+ ],
1495
+ "dtype": "bfloat16",
1496
+ "format": "raw",
1497
+ "nbytes": 884736,
1498
+ "byteOffset": 29870592
1499
+ },
1500
+ {
1501
+ "name": "encoder.layer.9.intermediate.dense.bias",
1502
+ "shape": [
1503
+ 1536
1504
+ ],
1505
+ "dtype": "bfloat16",
1506
+ "format": "raw",
1507
+ "nbytes": 3072,
1508
+ "byteOffset": 30755328
1509
+ },
1510
+ {
1511
+ "name": "encoder.layer.9.intermediate.dense.weight",
1512
+ "shape": [
1513
+ 1536,
1514
+ 384
1515
+ ],
1516
+ "dtype": "bfloat16",
1517
+ "format": "raw",
1518
+ "nbytes": 1179648,
1519
+ "byteOffset": 30758400
1520
+ },
1521
+ {
1522
+ "name": "encoder.layer.9.output.LayerNorm.bias",
1523
+ "shape": [
1524
+ 384
1525
+ ],
1526
+ "dtype": "bfloat16",
1527
+ "format": "raw",
1528
+ "nbytes": 768,
1529
+ "byteOffset": 31938048
1530
+ },
1531
+ {
1532
+ "name": "encoder.layer.9.output.LayerNorm.weight",
1533
+ "shape": [
1534
+ 384
1535
+ ],
1536
+ "dtype": "bfloat16",
1537
+ "format": "raw",
1538
+ "nbytes": 768,
1539
+ "byteOffset": 31938816
1540
+ },
1541
+ {
1542
+ "name": "encoder.layer.9.output.dense.bias",
1543
+ "shape": [
1544
+ 384
1545
+ ],
1546
+ "dtype": "bfloat16",
1547
+ "format": "raw",
1548
+ "nbytes": 768,
1549
+ "byteOffset": 31939584
1550
+ },
1551
+ {
1552
+ "name": "encoder.layer.9.output.dense.weight",
1553
+ "shape": [
1554
+ 384,
1555
+ 1536
1556
+ ],
1557
+ "dtype": "bfloat16",
1558
+ "format": "raw",
1559
+ "nbytes": 1179648,
1560
+ "byteOffset": 31940352
1561
+ }
1562
+ ],
1563
+ "md5sum": "4f150cdc4aff3c33d8a38c07d140b60b"
1564
+ }
1565
+ ]
1566
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,1566 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 149,
4
+ "ParamBytes": 132848640.0,
5
+ "BitsPerParam": 32.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 33304320,
12
+ "records": [
13
+ {
14
+ "name": "embeddings.LayerNorm.bias",
15
+ "shape": [
16
+ 384
17
+ ],
18
+ "dtype": "float32",
19
+ "format": "f32-to-bf16",
20
+ "nbytes": 768,
21
+ "byteOffset": 0
22
+ },
23
+ {
24
+ "name": "embeddings.LayerNorm.weight",
25
+ "shape": [
26
+ 384
27
+ ],
28
+ "dtype": "float32",
29
+ "format": "f32-to-bf16",
30
+ "nbytes": 768,
31
+ "byteOffset": 768
32
+ },
33
+ {
34
+ "name": "embeddings.position_embeddings.weight",
35
+ "shape": [
36
+ 512,
37
+ 384
38
+ ],
39
+ "dtype": "float32",
40
+ "format": "f32-to-bf16",
41
+ "nbytes": 393216,
42
+ "byteOffset": 1536
43
+ },
44
+ {
45
+ "name": "embeddings.token_type_embeddings.weight",
46
+ "shape": [
47
+ 2,
48
+ 384
49
+ ],
50
+ "dtype": "float32",
51
+ "format": "f32-to-bf16",
52
+ "nbytes": 1536,
53
+ "byteOffset": 394752
54
+ },
55
+ {
56
+ "name": "embeddings.word_embeddings.weight",
57
+ "shape": [
58
+ 30522,
59
+ 384
60
+ ],
61
+ "dtype": "float32",
62
+ "format": "f32-to-bf16",
63
+ "nbytes": 23440896,
64
+ "byteOffset": 396288
65
+ },
66
+ {
67
+ "name": "encoder.layer.0.attention.output.LayerNorm.bias",
68
+ "shape": [
69
+ 384
70
+ ],
71
+ "dtype": "float32",
72
+ "format": "f32-to-bf16",
73
+ "nbytes": 768,
74
+ "byteOffset": 23837184
75
+ },
76
+ {
77
+ "name": "encoder.layer.0.attention.output.LayerNorm.weight",
78
+ "shape": [
79
+ 384
80
+ ],
81
+ "dtype": "float32",
82
+ "format": "f32-to-bf16",
83
+ "nbytes": 768,
84
+ "byteOffset": 23837952
85
+ },
86
+ {
87
+ "name": "encoder.layer.0.attention.output.dense.bias",
88
+ "shape": [
89
+ 384
90
+ ],
91
+ "dtype": "float32",
92
+ "format": "f32-to-bf16",
93
+ "nbytes": 768,
94
+ "byteOffset": 23838720
95
+ },
96
+ {
97
+ "name": "encoder.layer.0.attention.output.dense.weight",
98
+ "shape": [
99
+ 384,
100
+ 384
101
+ ],
102
+ "dtype": "float32",
103
+ "format": "f32-to-bf16",
104
+ "nbytes": 294912,
105
+ "byteOffset": 23839488
106
+ },
107
+ {
108
+ "name": "encoder.layer.0.attention.self.qkv.bias",
109
+ "shape": [
110
+ 1152
111
+ ],
112
+ "dtype": "float32",
113
+ "format": "f32-to-bf16",
114
+ "nbytes": 2304,
115
+ "byteOffset": 24134400
116
+ },
117
+ {
118
+ "name": "encoder.layer.0.attention.self.qkv.weight",
119
+ "shape": [
120
+ 1152,
121
+ 384
122
+ ],
123
+ "dtype": "float32",
124
+ "format": "f32-to-bf16",
125
+ "nbytes": 884736,
126
+ "byteOffset": 24136704
127
+ },
128
+ {
129
+ "name": "encoder.layer.0.intermediate.dense.bias",
130
+ "shape": [
131
+ 1536
132
+ ],
133
+ "dtype": "float32",
134
+ "format": "f32-to-bf16",
135
+ "nbytes": 3072,
136
+ "byteOffset": 25021440
137
+ },
138
+ {
139
+ "name": "encoder.layer.0.intermediate.dense.weight",
140
+ "shape": [
141
+ 1536,
142
+ 384
143
+ ],
144
+ "dtype": "float32",
145
+ "format": "f32-to-bf16",
146
+ "nbytes": 1179648,
147
+ "byteOffset": 25024512
148
+ },
149
+ {
150
+ "name": "encoder.layer.0.output.LayerNorm.bias",
151
+ "shape": [
152
+ 384
153
+ ],
154
+ "dtype": "float32",
155
+ "format": "f32-to-bf16",
156
+ "nbytes": 768,
157
+ "byteOffset": 26204160
158
+ },
159
+ {
160
+ "name": "encoder.layer.0.output.LayerNorm.weight",
161
+ "shape": [
162
+ 384
163
+ ],
164
+ "dtype": "float32",
165
+ "format": "f32-to-bf16",
166
+ "nbytes": 768,
167
+ "byteOffset": 26204928
168
+ },
169
+ {
170
+ "name": "encoder.layer.0.output.dense.bias",
171
+ "shape": [
172
+ 384
173
+ ],
174
+ "dtype": "float32",
175
+ "format": "f32-to-bf16",
176
+ "nbytes": 768,
177
+ "byteOffset": 26205696
178
+ },
179
+ {
180
+ "name": "encoder.layer.0.output.dense.weight",
181
+ "shape": [
182
+ 384,
183
+ 1536
184
+ ],
185
+ "dtype": "float32",
186
+ "format": "f32-to-bf16",
187
+ "nbytes": 1179648,
188
+ "byteOffset": 26206464
189
+ },
190
+ {
191
+ "name": "encoder.layer.1.attention.output.LayerNorm.bias",
192
+ "shape": [
193
+ 384
194
+ ],
195
+ "dtype": "float32",
196
+ "format": "f32-to-bf16",
197
+ "nbytes": 768,
198
+ "byteOffset": 27386112
199
+ },
200
+ {
201
+ "name": "encoder.layer.1.attention.output.LayerNorm.weight",
202
+ "shape": [
203
+ 384
204
+ ],
205
+ "dtype": "float32",
206
+ "format": "f32-to-bf16",
207
+ "nbytes": 768,
208
+ "byteOffset": 27386880
209
+ },
210
+ {
211
+ "name": "encoder.layer.1.attention.output.dense.bias",
212
+ "shape": [
213
+ 384
214
+ ],
215
+ "dtype": "float32",
216
+ "format": "f32-to-bf16",
217
+ "nbytes": 768,
218
+ "byteOffset": 27387648
219
+ },
220
+ {
221
+ "name": "encoder.layer.1.attention.output.dense.weight",
222
+ "shape": [
223
+ 384,
224
+ 384
225
+ ],
226
+ "dtype": "float32",
227
+ "format": "f32-to-bf16",
228
+ "nbytes": 294912,
229
+ "byteOffset": 27388416
230
+ },
231
+ {
232
+ "name": "encoder.layer.1.attention.self.qkv.bias",
233
+ "shape": [
234
+ 1152
235
+ ],
236
+ "dtype": "float32",
237
+ "format": "f32-to-bf16",
238
+ "nbytes": 2304,
239
+ "byteOffset": 27683328
240
+ },
241
+ {
242
+ "name": "encoder.layer.1.attention.self.qkv.weight",
243
+ "shape": [
244
+ 1152,
245
+ 384
246
+ ],
247
+ "dtype": "float32",
248
+ "format": "f32-to-bf16",
249
+ "nbytes": 884736,
250
+ "byteOffset": 27685632
251
+ },
252
+ {
253
+ "name": "encoder.layer.1.intermediate.dense.bias",
254
+ "shape": [
255
+ 1536
256
+ ],
257
+ "dtype": "float32",
258
+ "format": "f32-to-bf16",
259
+ "nbytes": 3072,
260
+ "byteOffset": 28570368
261
+ },
262
+ {
263
+ "name": "encoder.layer.1.intermediate.dense.weight",
264
+ "shape": [
265
+ 1536,
266
+ 384
267
+ ],
268
+ "dtype": "float32",
269
+ "format": "f32-to-bf16",
270
+ "nbytes": 1179648,
271
+ "byteOffset": 28573440
272
+ },
273
+ {
274
+ "name": "encoder.layer.1.output.LayerNorm.bias",
275
+ "shape": [
276
+ 384
277
+ ],
278
+ "dtype": "float32",
279
+ "format": "f32-to-bf16",
280
+ "nbytes": 768,
281
+ "byteOffset": 29753088
282
+ },
283
+ {
284
+ "name": "encoder.layer.1.output.LayerNorm.weight",
285
+ "shape": [
286
+ 384
287
+ ],
288
+ "dtype": "float32",
289
+ "format": "f32-to-bf16",
290
+ "nbytes": 768,
291
+ "byteOffset": 29753856
292
+ },
293
+ {
294
+ "name": "encoder.layer.1.output.dense.bias",
295
+ "shape": [
296
+ 384
297
+ ],
298
+ "dtype": "float32",
299
+ "format": "f32-to-bf16",
300
+ "nbytes": 768,
301
+ "byteOffset": 29754624
302
+ },
303
+ {
304
+ "name": "encoder.layer.1.output.dense.weight",
305
+ "shape": [
306
+ 384,
307
+ 1536
308
+ ],
309
+ "dtype": "float32",
310
+ "format": "f32-to-bf16",
311
+ "nbytes": 1179648,
312
+ "byteOffset": 29755392
313
+ },
314
+ {
315
+ "name": "encoder.layer.10.attention.output.LayerNorm.bias",
316
+ "shape": [
317
+ 384
318
+ ],
319
+ "dtype": "float32",
320
+ "format": "f32-to-bf16",
321
+ "nbytes": 768,
322
+ "byteOffset": 30935040
323
+ },
324
+ {
325
+ "name": "encoder.layer.10.attention.output.LayerNorm.weight",
326
+ "shape": [
327
+ 384
328
+ ],
329
+ "dtype": "float32",
330
+ "format": "f32-to-bf16",
331
+ "nbytes": 768,
332
+ "byteOffset": 30935808
333
+ },
334
+ {
335
+ "name": "encoder.layer.10.attention.output.dense.bias",
336
+ "shape": [
337
+ 384
338
+ ],
339
+ "dtype": "float32",
340
+ "format": "f32-to-bf16",
341
+ "nbytes": 768,
342
+ "byteOffset": 30936576
343
+ },
344
+ {
345
+ "name": "encoder.layer.10.attention.output.dense.weight",
346
+ "shape": [
347
+ 384,
348
+ 384
349
+ ],
350
+ "dtype": "float32",
351
+ "format": "f32-to-bf16",
352
+ "nbytes": 294912,
353
+ "byteOffset": 30937344
354
+ },
355
+ {
356
+ "name": "encoder.layer.10.attention.self.qkv.bias",
357
+ "shape": [
358
+ 1152
359
+ ],
360
+ "dtype": "float32",
361
+ "format": "f32-to-bf16",
362
+ "nbytes": 2304,
363
+ "byteOffset": 31232256
364
+ },
365
+ {
366
+ "name": "encoder.layer.10.attention.self.qkv.weight",
367
+ "shape": [
368
+ 1152,
369
+ 384
370
+ ],
371
+ "dtype": "float32",
372
+ "format": "f32-to-bf16",
373
+ "nbytes": 884736,
374
+ "byteOffset": 31234560
375
+ },
376
+ {
377
+ "name": "encoder.layer.10.intermediate.dense.bias",
378
+ "shape": [
379
+ 1536
380
+ ],
381
+ "dtype": "float32",
382
+ "format": "f32-to-bf16",
383
+ "nbytes": 3072,
384
+ "byteOffset": 32119296
385
+ },
386
+ {
387
+ "name": "encoder.layer.10.intermediate.dense.weight",
388
+ "shape": [
389
+ 1536,
390
+ 384
391
+ ],
392
+ "dtype": "float32",
393
+ "format": "f32-to-bf16",
394
+ "nbytes": 1179648,
395
+ "byteOffset": 32122368
396
+ },
397
+ {
398
+ "name": "encoder.layer.10.output.LayerNorm.bias",
399
+ "shape": [
400
+ 384
401
+ ],
402
+ "dtype": "float32",
403
+ "format": "f32-to-bf16",
404
+ "nbytes": 768,
405
+ "byteOffset": 33302016
406
+ },
407
+ {
408
+ "name": "encoder.layer.10.output.LayerNorm.weight",
409
+ "shape": [
410
+ 384
411
+ ],
412
+ "dtype": "float32",
413
+ "format": "f32-to-bf16",
414
+ "nbytes": 768,
415
+ "byteOffset": 33302784
416
+ },
417
+ {
418
+ "name": "encoder.layer.10.output.dense.bias",
419
+ "shape": [
420
+ 384
421
+ ],
422
+ "dtype": "float32",
423
+ "format": "f32-to-bf16",
424
+ "nbytes": 768,
425
+ "byteOffset": 33303552
426
+ }
427
+ ],
428
+ "md5sum": "12f41dfd857e82988a77aaa528e2af9f"
429
+ },
430
+ {
431
+ "dataPath": "params_shard_1.bin",
432
+ "format": "raw-shard",
433
+ "nbytes": 33120000,
434
+ "records": [
435
+ {
436
+ "name": "encoder.layer.10.output.dense.weight",
437
+ "shape": [
438
+ 384,
439
+ 1536
440
+ ],
441
+ "dtype": "float32",
442
+ "format": "f32-to-bf16",
443
+ "nbytes": 1179648,
444
+ "byteOffset": 0
445
+ },
446
+ {
447
+ "name": "encoder.layer.11.attention.output.LayerNorm.bias",
448
+ "shape": [
449
+ 384
450
+ ],
451
+ "dtype": "float32",
452
+ "format": "f32-to-bf16",
453
+ "nbytes": 768,
454
+ "byteOffset": 1179648
455
+ },
456
+ {
457
+ "name": "encoder.layer.11.attention.output.LayerNorm.weight",
458
+ "shape": [
459
+ 384
460
+ ],
461
+ "dtype": "float32",
462
+ "format": "f32-to-bf16",
463
+ "nbytes": 768,
464
+ "byteOffset": 1180416
465
+ },
466
+ {
467
+ "name": "encoder.layer.11.attention.output.dense.bias",
468
+ "shape": [
469
+ 384
470
+ ],
471
+ "dtype": "float32",
472
+ "format": "f32-to-bf16",
473
+ "nbytes": 768,
474
+ "byteOffset": 1181184
475
+ },
476
+ {
477
+ "name": "encoder.layer.11.attention.output.dense.weight",
478
+ "shape": [
479
+ 384,
480
+ 384
481
+ ],
482
+ "dtype": "float32",
483
+ "format": "f32-to-bf16",
484
+ "nbytes": 294912,
485
+ "byteOffset": 1181952
486
+ },
487
+ {
488
+ "name": "encoder.layer.11.attention.self.qkv.bias",
489
+ "shape": [
490
+ 1152
491
+ ],
492
+ "dtype": "float32",
493
+ "format": "f32-to-bf16",
494
+ "nbytes": 2304,
495
+ "byteOffset": 1476864
496
+ },
497
+ {
498
+ "name": "encoder.layer.11.attention.self.qkv.weight",
499
+ "shape": [
500
+ 1152,
501
+ 384
502
+ ],
503
+ "dtype": "float32",
504
+ "format": "f32-to-bf16",
505
+ "nbytes": 884736,
506
+ "byteOffset": 1479168
507
+ },
508
+ {
509
+ "name": "encoder.layer.11.intermediate.dense.bias",
510
+ "shape": [
511
+ 1536
512
+ ],
513
+ "dtype": "float32",
514
+ "format": "f32-to-bf16",
515
+ "nbytes": 3072,
516
+ "byteOffset": 2363904
517
+ },
518
+ {
519
+ "name": "encoder.layer.11.intermediate.dense.weight",
520
+ "shape": [
521
+ 1536,
522
+ 384
523
+ ],
524
+ "dtype": "float32",
525
+ "format": "f32-to-bf16",
526
+ "nbytes": 1179648,
527
+ "byteOffset": 2366976
528
+ },
529
+ {
530
+ "name": "encoder.layer.11.output.LayerNorm.bias",
531
+ "shape": [
532
+ 384
533
+ ],
534
+ "dtype": "float32",
535
+ "format": "f32-to-bf16",
536
+ "nbytes": 768,
537
+ "byteOffset": 3546624
538
+ },
539
+ {
540
+ "name": "encoder.layer.11.output.LayerNorm.weight",
541
+ "shape": [
542
+ 384
543
+ ],
544
+ "dtype": "float32",
545
+ "format": "f32-to-bf16",
546
+ "nbytes": 768,
547
+ "byteOffset": 3547392
548
+ },
549
+ {
550
+ "name": "encoder.layer.11.output.dense.bias",
551
+ "shape": [
552
+ 384
553
+ ],
554
+ "dtype": "float32",
555
+ "format": "f32-to-bf16",
556
+ "nbytes": 768,
557
+ "byteOffset": 3548160
558
+ },
559
+ {
560
+ "name": "encoder.layer.11.output.dense.weight",
561
+ "shape": [
562
+ 384,
563
+ 1536
564
+ ],
565
+ "dtype": "float32",
566
+ "format": "f32-to-bf16",
567
+ "nbytes": 1179648,
568
+ "byteOffset": 3548928
569
+ },
570
+ {
571
+ "name": "encoder.layer.2.attention.output.LayerNorm.bias",
572
+ "shape": [
573
+ 384
574
+ ],
575
+ "dtype": "float32",
576
+ "format": "f32-to-bf16",
577
+ "nbytes": 768,
578
+ "byteOffset": 4728576
579
+ },
580
+ {
581
+ "name": "encoder.layer.2.attention.output.LayerNorm.weight",
582
+ "shape": [
583
+ 384
584
+ ],
585
+ "dtype": "float32",
586
+ "format": "f32-to-bf16",
587
+ "nbytes": 768,
588
+ "byteOffset": 4729344
589
+ },
590
+ {
591
+ "name": "encoder.layer.2.attention.output.dense.bias",
592
+ "shape": [
593
+ 384
594
+ ],
595
+ "dtype": "float32",
596
+ "format": "f32-to-bf16",
597
+ "nbytes": 768,
598
+ "byteOffset": 4730112
599
+ },
600
+ {
601
+ "name": "encoder.layer.2.attention.output.dense.weight",
602
+ "shape": [
603
+ 384,
604
+ 384
605
+ ],
606
+ "dtype": "float32",
607
+ "format": "f32-to-bf16",
608
+ "nbytes": 294912,
609
+ "byteOffset": 4730880
610
+ },
611
+ {
612
+ "name": "encoder.layer.2.attention.self.qkv.bias",
613
+ "shape": [
614
+ 1152
615
+ ],
616
+ "dtype": "float32",
617
+ "format": "f32-to-bf16",
618
+ "nbytes": 2304,
619
+ "byteOffset": 5025792
620
+ },
621
+ {
622
+ "name": "encoder.layer.2.attention.self.qkv.weight",
623
+ "shape": [
624
+ 1152,
625
+ 384
626
+ ],
627
+ "dtype": "float32",
628
+ "format": "f32-to-bf16",
629
+ "nbytes": 884736,
630
+ "byteOffset": 5028096
631
+ },
632
+ {
633
+ "name": "encoder.layer.2.intermediate.dense.bias",
634
+ "shape": [
635
+ 1536
636
+ ],
637
+ "dtype": "float32",
638
+ "format": "f32-to-bf16",
639
+ "nbytes": 3072,
640
+ "byteOffset": 5912832
641
+ },
642
+ {
643
+ "name": "encoder.layer.2.intermediate.dense.weight",
644
+ "shape": [
645
+ 1536,
646
+ 384
647
+ ],
648
+ "dtype": "float32",
649
+ "format": "f32-to-bf16",
650
+ "nbytes": 1179648,
651
+ "byteOffset": 5915904
652
+ },
653
+ {
654
+ "name": "encoder.layer.2.output.LayerNorm.bias",
655
+ "shape": [
656
+ 384
657
+ ],
658
+ "dtype": "float32",
659
+ "format": "f32-to-bf16",
660
+ "nbytes": 768,
661
+ "byteOffset": 7095552
662
+ },
663
+ {
664
+ "name": "encoder.layer.2.output.LayerNorm.weight",
665
+ "shape": [
666
+ 384
667
+ ],
668
+ "dtype": "float32",
669
+ "format": "f32-to-bf16",
670
+ "nbytes": 768,
671
+ "byteOffset": 7096320
672
+ },
673
+ {
674
+ "name": "encoder.layer.2.output.dense.bias",
675
+ "shape": [
676
+ 384
677
+ ],
678
+ "dtype": "float32",
679
+ "format": "f32-to-bf16",
680
+ "nbytes": 768,
681
+ "byteOffset": 7097088
682
+ },
683
+ {
684
+ "name": "encoder.layer.2.output.dense.weight",
685
+ "shape": [
686
+ 384,
687
+ 1536
688
+ ],
689
+ "dtype": "float32",
690
+ "format": "f32-to-bf16",
691
+ "nbytes": 1179648,
692
+ "byteOffset": 7097856
693
+ },
694
+ {
695
+ "name": "encoder.layer.3.attention.output.LayerNorm.bias",
696
+ "shape": [
697
+ 384
698
+ ],
699
+ "dtype": "float32",
700
+ "format": "f32-to-bf16",
701
+ "nbytes": 768,
702
+ "byteOffset": 8277504
703
+ },
704
+ {
705
+ "name": "encoder.layer.3.attention.output.LayerNorm.weight",
706
+ "shape": [
707
+ 384
708
+ ],
709
+ "dtype": "float32",
710
+ "format": "f32-to-bf16",
711
+ "nbytes": 768,
712
+ "byteOffset": 8278272
713
+ },
714
+ {
715
+ "name": "encoder.layer.3.attention.output.dense.bias",
716
+ "shape": [
717
+ 384
718
+ ],
719
+ "dtype": "float32",
720
+ "format": "f32-to-bf16",
721
+ "nbytes": 768,
722
+ "byteOffset": 8279040
723
+ },
724
+ {
725
+ "name": "encoder.layer.3.attention.output.dense.weight",
726
+ "shape": [
727
+ 384,
728
+ 384
729
+ ],
730
+ "dtype": "float32",
731
+ "format": "f32-to-bf16",
732
+ "nbytes": 294912,
733
+ "byteOffset": 8279808
734
+ },
735
+ {
736
+ "name": "encoder.layer.3.attention.self.qkv.bias",
737
+ "shape": [
738
+ 1152
739
+ ],
740
+ "dtype": "float32",
741
+ "format": "f32-to-bf16",
742
+ "nbytes": 2304,
743
+ "byteOffset": 8574720
744
+ },
745
+ {
746
+ "name": "encoder.layer.3.attention.self.qkv.weight",
747
+ "shape": [
748
+ 1152,
749
+ 384
750
+ ],
751
+ "dtype": "float32",
752
+ "format": "f32-to-bf16",
753
+ "nbytes": 884736,
754
+ "byteOffset": 8577024
755
+ },
756
+ {
757
+ "name": "encoder.layer.3.intermediate.dense.bias",
758
+ "shape": [
759
+ 1536
760
+ ],
761
+ "dtype": "float32",
762
+ "format": "f32-to-bf16",
763
+ "nbytes": 3072,
764
+ "byteOffset": 9461760
765
+ },
766
+ {
767
+ "name": "encoder.layer.3.intermediate.dense.weight",
768
+ "shape": [
769
+ 1536,
770
+ 384
771
+ ],
772
+ "dtype": "float32",
773
+ "format": "f32-to-bf16",
774
+ "nbytes": 1179648,
775
+ "byteOffset": 9464832
776
+ },
777
+ {
778
+ "name": "encoder.layer.3.output.LayerNorm.bias",
779
+ "shape": [
780
+ 384
781
+ ],
782
+ "dtype": "float32",
783
+ "format": "f32-to-bf16",
784
+ "nbytes": 768,
785
+ "byteOffset": 10644480
786
+ },
787
+ {
788
+ "name": "encoder.layer.3.output.LayerNorm.weight",
789
+ "shape": [
790
+ 384
791
+ ],
792
+ "dtype": "float32",
793
+ "format": "f32-to-bf16",
794
+ "nbytes": 768,
795
+ "byteOffset": 10645248
796
+ },
797
+ {
798
+ "name": "encoder.layer.3.output.dense.bias",
799
+ "shape": [
800
+ 384
801
+ ],
802
+ "dtype": "float32",
803
+ "format": "f32-to-bf16",
804
+ "nbytes": 768,
805
+ "byteOffset": 10646016
806
+ },
807
+ {
808
+ "name": "encoder.layer.3.output.dense.weight",
809
+ "shape": [
810
+ 384,
811
+ 1536
812
+ ],
813
+ "dtype": "float32",
814
+ "format": "f32-to-bf16",
815
+ "nbytes": 1179648,
816
+ "byteOffset": 10646784
817
+ },
818
+ {
819
+ "name": "encoder.layer.4.attention.output.LayerNorm.bias",
820
+ "shape": [
821
+ 384
822
+ ],
823
+ "dtype": "float32",
824
+ "format": "f32-to-bf16",
825
+ "nbytes": 768,
826
+ "byteOffset": 11826432
827
+ },
828
+ {
829
+ "name": "encoder.layer.4.attention.output.LayerNorm.weight",
830
+ "shape": [
831
+ 384
832
+ ],
833
+ "dtype": "float32",
834
+ "format": "f32-to-bf16",
835
+ "nbytes": 768,
836
+ "byteOffset": 11827200
837
+ },
838
+ {
839
+ "name": "encoder.layer.4.attention.output.dense.bias",
840
+ "shape": [
841
+ 384
842
+ ],
843
+ "dtype": "float32",
844
+ "format": "f32-to-bf16",
845
+ "nbytes": 768,
846
+ "byteOffset": 11827968
847
+ },
848
+ {
849
+ "name": "encoder.layer.4.attention.output.dense.weight",
850
+ "shape": [
851
+ 384,
852
+ 384
853
+ ],
854
+ "dtype": "float32",
855
+ "format": "f32-to-bf16",
856
+ "nbytes": 294912,
857
+ "byteOffset": 11828736
858
+ },
859
+ {
860
+ "name": "encoder.layer.4.attention.self.qkv.bias",
861
+ "shape": [
862
+ 1152
863
+ ],
864
+ "dtype": "float32",
865
+ "format": "f32-to-bf16",
866
+ "nbytes": 2304,
867
+ "byteOffset": 12123648
868
+ },
869
+ {
870
+ "name": "encoder.layer.4.attention.self.qkv.weight",
871
+ "shape": [
872
+ 1152,
873
+ 384
874
+ ],
875
+ "dtype": "float32",
876
+ "format": "f32-to-bf16",
877
+ "nbytes": 884736,
878
+ "byteOffset": 12125952
879
+ },
880
+ {
881
+ "name": "encoder.layer.4.intermediate.dense.bias",
882
+ "shape": [
883
+ 1536
884
+ ],
885
+ "dtype": "float32",
886
+ "format": "f32-to-bf16",
887
+ "nbytes": 3072,
888
+ "byteOffset": 13010688
889
+ },
890
+ {
891
+ "name": "encoder.layer.4.intermediate.dense.weight",
892
+ "shape": [
893
+ 1536,
894
+ 384
895
+ ],
896
+ "dtype": "float32",
897
+ "format": "f32-to-bf16",
898
+ "nbytes": 1179648,
899
+ "byteOffset": 13013760
900
+ },
901
+ {
902
+ "name": "encoder.layer.4.output.LayerNorm.bias",
903
+ "shape": [
904
+ 384
905
+ ],
906
+ "dtype": "float32",
907
+ "format": "f32-to-bf16",
908
+ "nbytes": 768,
909
+ "byteOffset": 14193408
910
+ },
911
+ {
912
+ "name": "encoder.layer.4.output.LayerNorm.weight",
913
+ "shape": [
914
+ 384
915
+ ],
916
+ "dtype": "float32",
917
+ "format": "f32-to-bf16",
918
+ "nbytes": 768,
919
+ "byteOffset": 14194176
920
+ },
921
+ {
922
+ "name": "encoder.layer.4.output.dense.bias",
923
+ "shape": [
924
+ 384
925
+ ],
926
+ "dtype": "float32",
927
+ "format": "f32-to-bf16",
928
+ "nbytes": 768,
929
+ "byteOffset": 14194944
930
+ },
931
+ {
932
+ "name": "encoder.layer.4.output.dense.weight",
933
+ "shape": [
934
+ 384,
935
+ 1536
936
+ ],
937
+ "dtype": "float32",
938
+ "format": "f32-to-bf16",
939
+ "nbytes": 1179648,
940
+ "byteOffset": 14195712
941
+ },
942
+ {
943
+ "name": "encoder.layer.5.attention.output.LayerNorm.bias",
944
+ "shape": [
945
+ 384
946
+ ],
947
+ "dtype": "float32",
948
+ "format": "f32-to-bf16",
949
+ "nbytes": 768,
950
+ "byteOffset": 15375360
951
+ },
952
+ {
953
+ "name": "encoder.layer.5.attention.output.LayerNorm.weight",
954
+ "shape": [
955
+ 384
956
+ ],
957
+ "dtype": "float32",
958
+ "format": "f32-to-bf16",
959
+ "nbytes": 768,
960
+ "byteOffset": 15376128
961
+ },
962
+ {
963
+ "name": "encoder.layer.5.attention.output.dense.bias",
964
+ "shape": [
965
+ 384
966
+ ],
967
+ "dtype": "float32",
968
+ "format": "f32-to-bf16",
969
+ "nbytes": 768,
970
+ "byteOffset": 15376896
971
+ },
972
+ {
973
+ "name": "encoder.layer.5.attention.output.dense.weight",
974
+ "shape": [
975
+ 384,
976
+ 384
977
+ ],
978
+ "dtype": "float32",
979
+ "format": "f32-to-bf16",
980
+ "nbytes": 294912,
981
+ "byteOffset": 15377664
982
+ },
983
+ {
984
+ "name": "encoder.layer.5.attention.self.qkv.bias",
985
+ "shape": [
986
+ 1152
987
+ ],
988
+ "dtype": "float32",
989
+ "format": "f32-to-bf16",
990
+ "nbytes": 2304,
991
+ "byteOffset": 15672576
992
+ },
993
+ {
994
+ "name": "encoder.layer.5.attention.self.qkv.weight",
995
+ "shape": [
996
+ 1152,
997
+ 384
998
+ ],
999
+ "dtype": "float32",
1000
+ "format": "f32-to-bf16",
1001
+ "nbytes": 884736,
1002
+ "byteOffset": 15674880
1003
+ },
1004
+ {
1005
+ "name": "encoder.layer.5.intermediate.dense.bias",
1006
+ "shape": [
1007
+ 1536
1008
+ ],
1009
+ "dtype": "float32",
1010
+ "format": "f32-to-bf16",
1011
+ "nbytes": 3072,
1012
+ "byteOffset": 16559616
1013
+ },
1014
+ {
1015
+ "name": "encoder.layer.5.intermediate.dense.weight",
1016
+ "shape": [
1017
+ 1536,
1018
+ 384
1019
+ ],
1020
+ "dtype": "float32",
1021
+ "format": "f32-to-bf16",
1022
+ "nbytes": 1179648,
1023
+ "byteOffset": 16562688
1024
+ },
1025
+ {
1026
+ "name": "encoder.layer.5.output.LayerNorm.bias",
1027
+ "shape": [
1028
+ 384
1029
+ ],
1030
+ "dtype": "float32",
1031
+ "format": "f32-to-bf16",
1032
+ "nbytes": 768,
1033
+ "byteOffset": 17742336
1034
+ },
1035
+ {
1036
+ "name": "encoder.layer.5.output.LayerNorm.weight",
1037
+ "shape": [
1038
+ 384
1039
+ ],
1040
+ "dtype": "float32",
1041
+ "format": "f32-to-bf16",
1042
+ "nbytes": 768,
1043
+ "byteOffset": 17743104
1044
+ },
1045
+ {
1046
+ "name": "encoder.layer.5.output.dense.bias",
1047
+ "shape": [
1048
+ 384
1049
+ ],
1050
+ "dtype": "float32",
1051
+ "format": "f32-to-bf16",
1052
+ "nbytes": 768,
1053
+ "byteOffset": 17743872
1054
+ },
1055
+ {
1056
+ "name": "encoder.layer.5.output.dense.weight",
1057
+ "shape": [
1058
+ 384,
1059
+ 1536
1060
+ ],
1061
+ "dtype": "float32",
1062
+ "format": "f32-to-bf16",
1063
+ "nbytes": 1179648,
1064
+ "byteOffset": 17744640
1065
+ },
1066
+ {
1067
+ "name": "encoder.layer.6.attention.output.LayerNorm.bias",
1068
+ "shape": [
1069
+ 384
1070
+ ],
1071
+ "dtype": "float32",
1072
+ "format": "f32-to-bf16",
1073
+ "nbytes": 768,
1074
+ "byteOffset": 18924288
1075
+ },
1076
+ {
1077
+ "name": "encoder.layer.6.attention.output.LayerNorm.weight",
1078
+ "shape": [
1079
+ 384
1080
+ ],
1081
+ "dtype": "float32",
1082
+ "format": "f32-to-bf16",
1083
+ "nbytes": 768,
1084
+ "byteOffset": 18925056
1085
+ },
1086
+ {
1087
+ "name": "encoder.layer.6.attention.output.dense.bias",
1088
+ "shape": [
1089
+ 384
1090
+ ],
1091
+ "dtype": "float32",
1092
+ "format": "f32-to-bf16",
1093
+ "nbytes": 768,
1094
+ "byteOffset": 18925824
1095
+ },
1096
+ {
1097
+ "name": "encoder.layer.6.attention.output.dense.weight",
1098
+ "shape": [
1099
+ 384,
1100
+ 384
1101
+ ],
1102
+ "dtype": "float32",
1103
+ "format": "f32-to-bf16",
1104
+ "nbytes": 294912,
1105
+ "byteOffset": 18926592
1106
+ },
1107
+ {
1108
+ "name": "encoder.layer.6.attention.self.qkv.bias",
1109
+ "shape": [
1110
+ 1152
1111
+ ],
1112
+ "dtype": "float32",
1113
+ "format": "f32-to-bf16",
1114
+ "nbytes": 2304,
1115
+ "byteOffset": 19221504
1116
+ },
1117
+ {
1118
+ "name": "encoder.layer.6.attention.self.qkv.weight",
1119
+ "shape": [
1120
+ 1152,
1121
+ 384
1122
+ ],
1123
+ "dtype": "float32",
1124
+ "format": "f32-to-bf16",
1125
+ "nbytes": 884736,
1126
+ "byteOffset": 19223808
1127
+ },
1128
+ {
1129
+ "name": "encoder.layer.6.intermediate.dense.bias",
1130
+ "shape": [
1131
+ 1536
1132
+ ],
1133
+ "dtype": "float32",
1134
+ "format": "f32-to-bf16",
1135
+ "nbytes": 3072,
1136
+ "byteOffset": 20108544
1137
+ },
1138
+ {
1139
+ "name": "encoder.layer.6.intermediate.dense.weight",
1140
+ "shape": [
1141
+ 1536,
1142
+ 384
1143
+ ],
1144
+ "dtype": "float32",
1145
+ "format": "f32-to-bf16",
1146
+ "nbytes": 1179648,
1147
+ "byteOffset": 20111616
1148
+ },
1149
+ {
1150
+ "name": "encoder.layer.6.output.LayerNorm.bias",
1151
+ "shape": [
1152
+ 384
1153
+ ],
1154
+ "dtype": "float32",
1155
+ "format": "f32-to-bf16",
1156
+ "nbytes": 768,
1157
+ "byteOffset": 21291264
1158
+ },
1159
+ {
1160
+ "name": "encoder.layer.6.output.LayerNorm.weight",
1161
+ "shape": [
1162
+ 384
1163
+ ],
1164
+ "dtype": "float32",
1165
+ "format": "f32-to-bf16",
1166
+ "nbytes": 768,
1167
+ "byteOffset": 21292032
1168
+ },
1169
+ {
1170
+ "name": "encoder.layer.6.output.dense.bias",
1171
+ "shape": [
1172
+ 384
1173
+ ],
1174
+ "dtype": "float32",
1175
+ "format": "f32-to-bf16",
1176
+ "nbytes": 768,
1177
+ "byteOffset": 21292800
1178
+ },
1179
+ {
1180
+ "name": "encoder.layer.6.output.dense.weight",
1181
+ "shape": [
1182
+ 384,
1183
+ 1536
1184
+ ],
1185
+ "dtype": "float32",
1186
+ "format": "f32-to-bf16",
1187
+ "nbytes": 1179648,
1188
+ "byteOffset": 21293568
1189
+ },
1190
+ {
1191
+ "name": "encoder.layer.7.attention.output.LayerNorm.bias",
1192
+ "shape": [
1193
+ 384
1194
+ ],
1195
+ "dtype": "float32",
1196
+ "format": "f32-to-bf16",
1197
+ "nbytes": 768,
1198
+ "byteOffset": 22473216
1199
+ },
1200
+ {
1201
+ "name": "encoder.layer.7.attention.output.LayerNorm.weight",
1202
+ "shape": [
1203
+ 384
1204
+ ],
1205
+ "dtype": "float32",
1206
+ "format": "f32-to-bf16",
1207
+ "nbytes": 768,
1208
+ "byteOffset": 22473984
1209
+ },
1210
+ {
1211
+ "name": "encoder.layer.7.attention.output.dense.bias",
1212
+ "shape": [
1213
+ 384
1214
+ ],
1215
+ "dtype": "float32",
1216
+ "format": "f32-to-bf16",
1217
+ "nbytes": 768,
1218
+ "byteOffset": 22474752
1219
+ },
1220
+ {
1221
+ "name": "encoder.layer.7.attention.output.dense.weight",
1222
+ "shape": [
1223
+ 384,
1224
+ 384
1225
+ ],
1226
+ "dtype": "float32",
1227
+ "format": "f32-to-bf16",
1228
+ "nbytes": 294912,
1229
+ "byteOffset": 22475520
1230
+ },
1231
+ {
1232
+ "name": "encoder.layer.7.attention.self.qkv.bias",
1233
+ "shape": [
1234
+ 1152
1235
+ ],
1236
+ "dtype": "float32",
1237
+ "format": "f32-to-bf16",
1238
+ "nbytes": 2304,
1239
+ "byteOffset": 22770432
1240
+ },
1241
+ {
1242
+ "name": "encoder.layer.7.attention.self.qkv.weight",
1243
+ "shape": [
1244
+ 1152,
1245
+ 384
1246
+ ],
1247
+ "dtype": "float32",
1248
+ "format": "f32-to-bf16",
1249
+ "nbytes": 884736,
1250
+ "byteOffset": 22772736
1251
+ },
1252
+ {
1253
+ "name": "encoder.layer.7.intermediate.dense.bias",
1254
+ "shape": [
1255
+ 1536
1256
+ ],
1257
+ "dtype": "float32",
1258
+ "format": "f32-to-bf16",
1259
+ "nbytes": 3072,
1260
+ "byteOffset": 23657472
1261
+ },
1262
+ {
1263
+ "name": "encoder.layer.7.intermediate.dense.weight",
1264
+ "shape": [
1265
+ 1536,
1266
+ 384
1267
+ ],
1268
+ "dtype": "float32",
1269
+ "format": "f32-to-bf16",
1270
+ "nbytes": 1179648,
1271
+ "byteOffset": 23660544
1272
+ },
1273
+ {
1274
+ "name": "encoder.layer.7.output.LayerNorm.bias",
1275
+ "shape": [
1276
+ 384
1277
+ ],
1278
+ "dtype": "float32",
1279
+ "format": "f32-to-bf16",
1280
+ "nbytes": 768,
1281
+ "byteOffset": 24840192
1282
+ },
1283
+ {
1284
+ "name": "encoder.layer.7.output.LayerNorm.weight",
1285
+ "shape": [
1286
+ 384
1287
+ ],
1288
+ "dtype": "float32",
1289
+ "format": "f32-to-bf16",
1290
+ "nbytes": 768,
1291
+ "byteOffset": 24840960
1292
+ },
1293
+ {
1294
+ "name": "encoder.layer.7.output.dense.bias",
1295
+ "shape": [
1296
+ 384
1297
+ ],
1298
+ "dtype": "float32",
1299
+ "format": "f32-to-bf16",
1300
+ "nbytes": 768,
1301
+ "byteOffset": 24841728
1302
+ },
1303
+ {
1304
+ "name": "encoder.layer.7.output.dense.weight",
1305
+ "shape": [
1306
+ 384,
1307
+ 1536
1308
+ ],
1309
+ "dtype": "float32",
1310
+ "format": "f32-to-bf16",
1311
+ "nbytes": 1179648,
1312
+ "byteOffset": 24842496
1313
+ },
1314
+ {
1315
+ "name": "encoder.layer.8.attention.output.LayerNorm.bias",
1316
+ "shape": [
1317
+ 384
1318
+ ],
1319
+ "dtype": "float32",
1320
+ "format": "f32-to-bf16",
1321
+ "nbytes": 768,
1322
+ "byteOffset": 26022144
1323
+ },
1324
+ {
1325
+ "name": "encoder.layer.8.attention.output.LayerNorm.weight",
1326
+ "shape": [
1327
+ 384
1328
+ ],
1329
+ "dtype": "float32",
1330
+ "format": "f32-to-bf16",
1331
+ "nbytes": 768,
1332
+ "byteOffset": 26022912
1333
+ },
1334
+ {
1335
+ "name": "encoder.layer.8.attention.output.dense.bias",
1336
+ "shape": [
1337
+ 384
1338
+ ],
1339
+ "dtype": "float32",
1340
+ "format": "f32-to-bf16",
1341
+ "nbytes": 768,
1342
+ "byteOffset": 26023680
1343
+ },
1344
+ {
1345
+ "name": "encoder.layer.8.attention.output.dense.weight",
1346
+ "shape": [
1347
+ 384,
1348
+ 384
1349
+ ],
1350
+ "dtype": "float32",
1351
+ "format": "f32-to-bf16",
1352
+ "nbytes": 294912,
1353
+ "byteOffset": 26024448
1354
+ },
1355
+ {
1356
+ "name": "encoder.layer.8.attention.self.qkv.bias",
1357
+ "shape": [
1358
+ 1152
1359
+ ],
1360
+ "dtype": "float32",
1361
+ "format": "f32-to-bf16",
1362
+ "nbytes": 2304,
1363
+ "byteOffset": 26319360
1364
+ },
1365
+ {
1366
+ "name": "encoder.layer.8.attention.self.qkv.weight",
1367
+ "shape": [
1368
+ 1152,
1369
+ 384
1370
+ ],
1371
+ "dtype": "float32",
1372
+ "format": "f32-to-bf16",
1373
+ "nbytes": 884736,
1374
+ "byteOffset": 26321664
1375
+ },
1376
+ {
1377
+ "name": "encoder.layer.8.intermediate.dense.bias",
1378
+ "shape": [
1379
+ 1536
1380
+ ],
1381
+ "dtype": "float32",
1382
+ "format": "f32-to-bf16",
1383
+ "nbytes": 3072,
1384
+ "byteOffset": 27206400
1385
+ },
1386
+ {
1387
+ "name": "encoder.layer.8.intermediate.dense.weight",
1388
+ "shape": [
1389
+ 1536,
1390
+ 384
1391
+ ],
1392
+ "dtype": "float32",
1393
+ "format": "f32-to-bf16",
1394
+ "nbytes": 1179648,
1395
+ "byteOffset": 27209472
1396
+ },
1397
+ {
1398
+ "name": "encoder.layer.8.output.LayerNorm.bias",
1399
+ "shape": [
1400
+ 384
1401
+ ],
1402
+ "dtype": "float32",
1403
+ "format": "f32-to-bf16",
1404
+ "nbytes": 768,
1405
+ "byteOffset": 28389120
1406
+ },
1407
+ {
1408
+ "name": "encoder.layer.8.output.LayerNorm.weight",
1409
+ "shape": [
1410
+ 384
1411
+ ],
1412
+ "dtype": "float32",
1413
+ "format": "f32-to-bf16",
1414
+ "nbytes": 768,
1415
+ "byteOffset": 28389888
1416
+ },
1417
+ {
1418
+ "name": "encoder.layer.8.output.dense.bias",
1419
+ "shape": [
1420
+ 384
1421
+ ],
1422
+ "dtype": "float32",
1423
+ "format": "f32-to-bf16",
1424
+ "nbytes": 768,
1425
+ "byteOffset": 28390656
1426
+ },
1427
+ {
1428
+ "name": "encoder.layer.8.output.dense.weight",
1429
+ "shape": [
1430
+ 384,
1431
+ 1536
1432
+ ],
1433
+ "dtype": "float32",
1434
+ "format": "f32-to-bf16",
1435
+ "nbytes": 1179648,
1436
+ "byteOffset": 28391424
1437
+ },
1438
+ {
1439
+ "name": "encoder.layer.9.attention.output.LayerNorm.bias",
1440
+ "shape": [
1441
+ 384
1442
+ ],
1443
+ "dtype": "float32",
1444
+ "format": "f32-to-bf16",
1445
+ "nbytes": 768,
1446
+ "byteOffset": 29571072
1447
+ },
1448
+ {
1449
+ "name": "encoder.layer.9.attention.output.LayerNorm.weight",
1450
+ "shape": [
1451
+ 384
1452
+ ],
1453
+ "dtype": "float32",
1454
+ "format": "f32-to-bf16",
1455
+ "nbytes": 768,
1456
+ "byteOffset": 29571840
1457
+ },
1458
+ {
1459
+ "name": "encoder.layer.9.attention.output.dense.bias",
1460
+ "shape": [
1461
+ 384
1462
+ ],
1463
+ "dtype": "float32",
1464
+ "format": "f32-to-bf16",
1465
+ "nbytes": 768,
1466
+ "byteOffset": 29572608
1467
+ },
1468
+ {
1469
+ "name": "encoder.layer.9.attention.output.dense.weight",
1470
+ "shape": [
1471
+ 384,
1472
+ 384
1473
+ ],
1474
+ "dtype": "float32",
1475
+ "format": "f32-to-bf16",
1476
+ "nbytes": 294912,
1477
+ "byteOffset": 29573376
1478
+ },
1479
+ {
1480
+ "name": "encoder.layer.9.attention.self.qkv.bias",
1481
+ "shape": [
1482
+ 1152
1483
+ ],
1484
+ "dtype": "float32",
1485
+ "format": "f32-to-bf16",
1486
+ "nbytes": 2304,
1487
+ "byteOffset": 29868288
1488
+ },
1489
+ {
1490
+ "name": "encoder.layer.9.attention.self.qkv.weight",
1491
+ "shape": [
1492
+ 1152,
1493
+ 384
1494
+ ],
1495
+ "dtype": "float32",
1496
+ "format": "f32-to-bf16",
1497
+ "nbytes": 884736,
1498
+ "byteOffset": 29870592
1499
+ },
1500
+ {
1501
+ "name": "encoder.layer.9.intermediate.dense.bias",
1502
+ "shape": [
1503
+ 1536
1504
+ ],
1505
+ "dtype": "float32",
1506
+ "format": "f32-to-bf16",
1507
+ "nbytes": 3072,
1508
+ "byteOffset": 30755328
1509
+ },
1510
+ {
1511
+ "name": "encoder.layer.9.intermediate.dense.weight",
1512
+ "shape": [
1513
+ 1536,
1514
+ 384
1515
+ ],
1516
+ "dtype": "float32",
1517
+ "format": "f32-to-bf16",
1518
+ "nbytes": 1179648,
1519
+ "byteOffset": 30758400
1520
+ },
1521
+ {
1522
+ "name": "encoder.layer.9.output.LayerNorm.bias",
1523
+ "shape": [
1524
+ 384
1525
+ ],
1526
+ "dtype": "float32",
1527
+ "format": "f32-to-bf16",
1528
+ "nbytes": 768,
1529
+ "byteOffset": 31938048
1530
+ },
1531
+ {
1532
+ "name": "encoder.layer.9.output.LayerNorm.weight",
1533
+ "shape": [
1534
+ 384
1535
+ ],
1536
+ "dtype": "float32",
1537
+ "format": "f32-to-bf16",
1538
+ "nbytes": 768,
1539
+ "byteOffset": 31938816
1540
+ },
1541
+ {
1542
+ "name": "encoder.layer.9.output.dense.bias",
1543
+ "shape": [
1544
+ 384
1545
+ ],
1546
+ "dtype": "float32",
1547
+ "format": "f32-to-bf16",
1548
+ "nbytes": 768,
1549
+ "byteOffset": 31939584
1550
+ },
1551
+ {
1552
+ "name": "encoder.layer.9.output.dense.weight",
1553
+ "shape": [
1554
+ 384,
1555
+ 1536
1556
+ ],
1557
+ "dtype": "float32",
1558
+ "format": "f32-to-bf16",
1559
+ "nbytes": 1179648,
1560
+ "byteOffset": 31940352
1561
+ }
1562
+ ],
1563
+ "md5sum": "4f150cdc4aff3c33d8a38c07d140b60b"
1564
+ }
1565
+ ]
1566
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97f38e6eeb5d58d0af69c1cbb6f2f16a6a7d9ebc2b891c817b7c07b2bf4066f8
3
+ size 33304320
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dcd2c02b61eb662c24419559a392139f9933f5f053dd525c5d3fa09761e1ef7
3
+ size 33120000
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "max_length": 512,
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_to_multiple_of": null,
53
+ "pad_token": "[PAD]",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
+ "sep_token": "[SEP]",
57
+ "stride": 0,
58
+ "strip_accents": null,
59
+ "tokenize_chinese_chars": true,
60
+ "tokenizer_class": "BertTokenizer",
61
+ "truncation_side": "right",
62
+ "truncation_strategy": "longest_first",
63
+ "unk_token": "[UNK]"
64
+ }