d-matrix commited on
Commit
b99633c
·
verified ·
1 Parent(s): ffaee51

Delete configs/BASIC.yaml

Browse files
Files changed (1) hide show
  1. configs/BASIC.yaml +0 -2832
configs/BASIC.yaml DELETED
@@ -1,2832 +0,0 @@
1
- model:
2
- lm_head:
3
- accum_format: SAME
4
- approximation_function: NONE
5
- input_format: SAME
6
- instance: Linear
7
- output_format: SAME
8
- weight_format: SAME
9
- weight_sparseness: DENSE
10
- model.decoder.final_layer_norm:
11
- approximation_function: NONE
12
- bias_format: SAME
13
- input_format: SAME
14
- instance: LayerNorm
15
- output_format: SAME
16
- weight_format: SAME
17
- model.decoder.layers.0.activation_fn:
18
- approximation_function: NONE
19
- input_format: SAME
20
- instance: ReLU
21
- output_format: SAME
22
- model.decoder.layers.0.dropout:
23
- approximation_function: NONE
24
- input_format: SAME
25
- instance: Dropout
26
- output_format: SAME
27
- model.decoder.layers.0.fc1:
28
- accum_format: SAME
29
- approximation_function: NONE
30
- bias_format: SAME
31
- input_format: BFP[8|8]{64,-1}(SN)
32
- instance: Linear
33
- output_format: SAME
34
- weight_format: BFP[8|8]{64,-1}(SN)
35
- weight_sparseness: DENSE
36
- model.decoder.layers.0.fc2:
37
- accum_format: SAME
38
- approximation_function: NONE
39
- bias_format: SAME
40
- input_format: BFP[8|8]{64,-1}(SN)
41
- instance: Linear
42
- output_format: SAME
43
- weight_format: BFP[8|8]{64,-1}(SN)
44
- weight_sparseness: DENSE
45
- model.decoder.layers.0.final_layer_norm:
46
- approximation_function: NONE
47
- bias_format: SAME
48
- input_format: SAME
49
- instance: LayerNorm
50
- output_format: SAME
51
- weight_format: SAME
52
- model.decoder.layers.0.self_attn.dropout:
53
- approximation_function: NONE
54
- input_format: SAME
55
- instance: Dropout
56
- output_format: BFP[8|8]{64,-1}(SN)
57
- model.decoder.layers.0.self_attn.k_proj:
58
- accum_format: SAME
59
- approximation_function: NONE
60
- bias_format: SAME
61
- input_format: BFP[8|8]{64,-1}(SN)
62
- instance: Linear
63
- output_format: BFP[8|8]{64,-1}(SN)
64
- weight_format: BFP[8|8]{64,-1}(SN)
65
- weight_sparseness: DENSE
66
- model.decoder.layers.0.self_attn.out_proj:
67
- accum_format: SAME
68
- approximation_function: NONE
69
- bias_format: SAME
70
- input_format: BFP[8|8]{64,-1}(SN)
71
- instance: Linear
72
- output_format: SAME
73
- weight_format: BFP[8|8]{64,-1}(SN)
74
- weight_sparseness: DENSE
75
- model.decoder.layers.0.self_attn.q_proj:
76
- accum_format: SAME
77
- approximation_function: NONE
78
- bias_format: SAME
79
- input_format: BFP[8|8]{64,-1}(SN)
80
- instance: Linear
81
- output_format: BFP[8|8]{64,-1}(SN)
82
- weight_format: BFP[8|8]{64,-1}(SN)
83
- weight_sparseness: DENSE
84
- model.decoder.layers.0.self_attn.softmax:
85
- approximation_function: NONE
86
- input_format: SAME
87
- instance: Softmax
88
- output_format: SAME
89
- model.decoder.layers.0.self_attn.v_proj:
90
- accum_format: SAME
91
- approximation_function: NONE
92
- bias_format: SAME
93
- input_format: BFP[8|8]{64,-1}(SN)
94
- instance: Linear
95
- output_format: BFP[8|8]{64,-1}(SN)
96
- weight_format: BFP[8|8]{64,-1}(SN)
97
- weight_sparseness: DENSE
98
- model.decoder.layers.0.self_attn_layer_norm:
99
- approximation_function: NONE
100
- bias_format: SAME
101
- input_format: SAME
102
- instance: LayerNorm
103
- output_format: SAME
104
- weight_format: SAME
105
- model.decoder.layers.1.activation_fn:
106
- approximation_function: NONE
107
- input_format: SAME
108
- instance: ReLU
109
- output_format: SAME
110
- model.decoder.layers.1.dropout:
111
- approximation_function: NONE
112
- input_format: SAME
113
- instance: Dropout
114
- output_format: SAME
115
- model.decoder.layers.1.fc1:
116
- accum_format: SAME
117
- approximation_function: NONE
118
- bias_format: SAME
119
- input_format: BFP[8|8]{64,-1}(SN)
120
- instance: Linear
121
- output_format: SAME
122
- weight_format: BFP[8|8]{64,-1}(SN)
123
- weight_sparseness: DENSE
124
- model.decoder.layers.1.fc2:
125
- accum_format: SAME
126
- approximation_function: NONE
127
- bias_format: SAME
128
- input_format: BFP[8|8]{64,-1}(SN)
129
- instance: Linear
130
- output_format: SAME
131
- weight_format: BFP[8|8]{64,-1}(SN)
132
- weight_sparseness: DENSE
133
- model.decoder.layers.1.final_layer_norm:
134
- approximation_function: NONE
135
- bias_format: SAME
136
- input_format: SAME
137
- instance: LayerNorm
138
- output_format: SAME
139
- weight_format: SAME
140
- model.decoder.layers.1.self_attn.dropout:
141
- approximation_function: NONE
142
- input_format: SAME
143
- instance: Dropout
144
- output_format: BFP[8|8]{64,-1}(SN)
145
- model.decoder.layers.1.self_attn.k_proj:
146
- accum_format: SAME
147
- approximation_function: NONE
148
- bias_format: SAME
149
- input_format: BFP[8|8]{64,-1}(SN)
150
- instance: Linear
151
- output_format: BFP[8|8]{64,-1}(SN)
152
- weight_format: BFP[8|8]{64,-1}(SN)
153
- weight_sparseness: DENSE
154
- model.decoder.layers.1.self_attn.out_proj:
155
- accum_format: SAME
156
- approximation_function: NONE
157
- bias_format: SAME
158
- input_format: BFP[8|8]{64,-1}(SN)
159
- instance: Linear
160
- output_format: SAME
161
- weight_format: BFP[8|8]{64,-1}(SN)
162
- weight_sparseness: DENSE
163
- model.decoder.layers.1.self_attn.q_proj:
164
- accum_format: SAME
165
- approximation_function: NONE
166
- bias_format: SAME
167
- input_format: BFP[8|8]{64,-1}(SN)
168
- instance: Linear
169
- output_format: BFP[8|8]{64,-1}(SN)
170
- weight_format: BFP[8|8]{64,-1}(SN)
171
- weight_sparseness: DENSE
172
- model.decoder.layers.1.self_attn.softmax:
173
- approximation_function: NONE
174
- input_format: SAME
175
- instance: Softmax
176
- output_format: SAME
177
- model.decoder.layers.1.self_attn.v_proj:
178
- accum_format: SAME
179
- approximation_function: NONE
180
- bias_format: SAME
181
- input_format: BFP[8|8]{64,-1}(SN)
182
- instance: Linear
183
- output_format: BFP[8|8]{64,-1}(SN)
184
- weight_format: BFP[8|8]{64,-1}(SN)
185
- weight_sparseness: DENSE
186
- model.decoder.layers.1.self_attn_layer_norm:
187
- approximation_function: NONE
188
- bias_format: SAME
189
- input_format: SAME
190
- instance: LayerNorm
191
- output_format: SAME
192
- weight_format: SAME
193
- model.decoder.layers.10.activation_fn:
194
- approximation_function: NONE
195
- input_format: SAME
196
- instance: ReLU
197
- output_format: SAME
198
- model.decoder.layers.10.dropout:
199
- approximation_function: NONE
200
- input_format: SAME
201
- instance: Dropout
202
- output_format: SAME
203
- model.decoder.layers.10.fc1:
204
- accum_format: SAME
205
- approximation_function: NONE
206
- bias_format: SAME
207
- input_format: BFP[8|8]{64,-1}(SN)
208
- instance: Linear
209
- output_format: SAME
210
- weight_format: BFP[8|8]{64,-1}(SN)
211
- weight_sparseness: DENSE
212
- model.decoder.layers.10.fc2:
213
- accum_format: SAME
214
- approximation_function: NONE
215
- bias_format: SAME
216
- input_format: BFP[8|8]{64,-1}(SN)
217
- instance: Linear
218
- output_format: SAME
219
- weight_format: BFP[8|8]{64,-1}(SN)
220
- weight_sparseness: DENSE
221
- model.decoder.layers.10.final_layer_norm:
222
- approximation_function: NONE
223
- bias_format: SAME
224
- input_format: SAME
225
- instance: LayerNorm
226
- output_format: SAME
227
- weight_format: SAME
228
- model.decoder.layers.10.self_attn.dropout:
229
- approximation_function: NONE
230
- input_format: SAME
231
- instance: Dropout
232
- output_format: BFP[8|8]{64,-1}(SN)
233
- model.decoder.layers.10.self_attn.k_proj:
234
- accum_format: SAME
235
- approximation_function: NONE
236
- bias_format: SAME
237
- input_format: BFP[8|8]{64,-1}(SN)
238
- instance: Linear
239
- output_format: BFP[8|8]{64,-1}(SN)
240
- weight_format: BFP[8|8]{64,-1}(SN)
241
- weight_sparseness: DENSE
242
- model.decoder.layers.10.self_attn.out_proj:
243
- accum_format: SAME
244
- approximation_function: NONE
245
- bias_format: SAME
246
- input_format: BFP[8|8]{64,-1}(SN)
247
- instance: Linear
248
- output_format: SAME
249
- weight_format: BFP[8|8]{64,-1}(SN)
250
- weight_sparseness: DENSE
251
- model.decoder.layers.10.self_attn.q_proj:
252
- accum_format: SAME
253
- approximation_function: NONE
254
- bias_format: SAME
255
- input_format: BFP[8|8]{64,-1}(SN)
256
- instance: Linear
257
- output_format: BFP[8|8]{64,-1}(SN)
258
- weight_format: BFP[8|8]{64,-1}(SN)
259
- weight_sparseness: DENSE
260
- model.decoder.layers.10.self_attn.softmax:
261
- approximation_function: NONE
262
- input_format: SAME
263
- instance: Softmax
264
- output_format: SAME
265
- model.decoder.layers.10.self_attn.v_proj:
266
- accum_format: SAME
267
- approximation_function: NONE
268
- bias_format: SAME
269
- input_format: BFP[8|8]{64,-1}(SN)
270
- instance: Linear
271
- output_format: BFP[8|8]{64,-1}(SN)
272
- weight_format: BFP[8|8]{64,-1}(SN)
273
- weight_sparseness: DENSE
274
- model.decoder.layers.10.self_attn_layer_norm:
275
- approximation_function: NONE
276
- bias_format: SAME
277
- input_format: SAME
278
- instance: LayerNorm
279
- output_format: SAME
280
- weight_format: SAME
281
- model.decoder.layers.11.activation_fn:
282
- approximation_function: NONE
283
- input_format: SAME
284
- instance: ReLU
285
- output_format: SAME
286
- model.decoder.layers.11.dropout:
287
- approximation_function: NONE
288
- input_format: SAME
289
- instance: Dropout
290
- output_format: SAME
291
- model.decoder.layers.11.fc1:
292
- accum_format: SAME
293
- approximation_function: NONE
294
- bias_format: SAME
295
- input_format: BFP[8|8]{64,-1}(SN)
296
- instance: Linear
297
- output_format: SAME
298
- weight_format: BFP[8|8]{64,-1}(SN)
299
- weight_sparseness: DENSE
300
- model.decoder.layers.11.fc2:
301
- accum_format: SAME
302
- approximation_function: NONE
303
- bias_format: SAME
304
- input_format: BFP[8|8]{64,-1}(SN)
305
- instance: Linear
306
- output_format: SAME
307
- weight_format: BFP[8|8]{64,-1}(SN)
308
- weight_sparseness: DENSE
309
- model.decoder.layers.11.final_layer_norm:
310
- approximation_function: NONE
311
- bias_format: SAME
312
- input_format: SAME
313
- instance: LayerNorm
314
- output_format: SAME
315
- weight_format: SAME
316
- model.decoder.layers.11.self_attn.dropout:
317
- approximation_function: NONE
318
- input_format: SAME
319
- instance: Dropout
320
- output_format: BFP[8|8]{64,-1}(SN)
321
- model.decoder.layers.11.self_attn.k_proj:
322
- accum_format: SAME
323
- approximation_function: NONE
324
- bias_format: SAME
325
- input_format: BFP[8|8]{64,-1}(SN)
326
- instance: Linear
327
- output_format: BFP[8|8]{64,-1}(SN)
328
- weight_format: BFP[8|8]{64,-1}(SN)
329
- weight_sparseness: DENSE
330
- model.decoder.layers.11.self_attn.out_proj:
331
- accum_format: SAME
332
- approximation_function: NONE
333
- bias_format: SAME
334
- input_format: BFP[8|8]{64,-1}(SN)
335
- instance: Linear
336
- output_format: SAME
337
- weight_format: BFP[8|8]{64,-1}(SN)
338
- weight_sparseness: DENSE
339
- model.decoder.layers.11.self_attn.q_proj:
340
- accum_format: SAME
341
- approximation_function: NONE
342
- bias_format: SAME
343
- input_format: BFP[8|8]{64,-1}(SN)
344
- instance: Linear
345
- output_format: BFP[8|8]{64,-1}(SN)
346
- weight_format: BFP[8|8]{64,-1}(SN)
347
- weight_sparseness: DENSE
348
- model.decoder.layers.11.self_attn.softmax:
349
- approximation_function: NONE
350
- input_format: SAME
351
- instance: Softmax
352
- output_format: SAME
353
- model.decoder.layers.11.self_attn.v_proj:
354
- accum_format: SAME
355
- approximation_function: NONE
356
- bias_format: SAME
357
- input_format: BFP[8|8]{64,-1}(SN)
358
- instance: Linear
359
- output_format: BFP[8|8]{64,-1}(SN)
360
- weight_format: BFP[8|8]{64,-1}(SN)
361
- weight_sparseness: DENSE
362
- model.decoder.layers.11.self_attn_layer_norm:
363
- approximation_function: NONE
364
- bias_format: SAME
365
- input_format: SAME
366
- instance: LayerNorm
367
- output_format: SAME
368
- weight_format: SAME
369
- model.decoder.layers.12.activation_fn:
370
- approximation_function: NONE
371
- input_format: SAME
372
- instance: ReLU
373
- output_format: SAME
374
- model.decoder.layers.12.dropout:
375
- approximation_function: NONE
376
- input_format: SAME
377
- instance: Dropout
378
- output_format: SAME
379
- model.decoder.layers.12.fc1:
380
- accum_format: SAME
381
- approximation_function: NONE
382
- bias_format: SAME
383
- input_format: BFP[8|8]{64,-1}(SN)
384
- instance: Linear
385
- output_format: SAME
386
- weight_format: BFP[8|8]{64,-1}(SN)
387
- weight_sparseness: DENSE
388
- model.decoder.layers.12.fc2:
389
- accum_format: SAME
390
- approximation_function: NONE
391
- bias_format: SAME
392
- input_format: BFP[8|8]{64,-1}(SN)
393
- instance: Linear
394
- output_format: SAME
395
- weight_format: BFP[8|8]{64,-1}(SN)
396
- weight_sparseness: DENSE
397
- model.decoder.layers.12.final_layer_norm:
398
- approximation_function: NONE
399
- bias_format: SAME
400
- input_format: SAME
401
- instance: LayerNorm
402
- output_format: SAME
403
- weight_format: SAME
404
- model.decoder.layers.12.self_attn.dropout:
405
- approximation_function: NONE
406
- input_format: SAME
407
- instance: Dropout
408
- output_format: BFP[8|8]{64,-1}(SN)
409
- model.decoder.layers.12.self_attn.k_proj:
410
- accum_format: SAME
411
- approximation_function: NONE
412
- bias_format: SAME
413
- input_format: BFP[8|8]{64,-1}(SN)
414
- instance: Linear
415
- output_format: BFP[8|8]{64,-1}(SN)
416
- weight_format: BFP[8|8]{64,-1}(SN)
417
- weight_sparseness: DENSE
418
- model.decoder.layers.12.self_attn.out_proj:
419
- accum_format: SAME
420
- approximation_function: NONE
421
- bias_format: SAME
422
- input_format: BFP[8|8]{64,-1}(SN)
423
- instance: Linear
424
- output_format: SAME
425
- weight_format: BFP[8|8]{64,-1}(SN)
426
- weight_sparseness: DENSE
427
- model.decoder.layers.12.self_attn.q_proj:
428
- accum_format: SAME
429
- approximation_function: NONE
430
- bias_format: SAME
431
- input_format: BFP[8|8]{64,-1}(SN)
432
- instance: Linear
433
- output_format: BFP[8|8]{64,-1}(SN)
434
- weight_format: BFP[8|8]{64,-1}(SN)
435
- weight_sparseness: DENSE
436
- model.decoder.layers.12.self_attn.softmax:
437
- approximation_function: NONE
438
- input_format: SAME
439
- instance: Softmax
440
- output_format: SAME
441
- model.decoder.layers.12.self_attn.v_proj:
442
- accum_format: SAME
443
- approximation_function: NONE
444
- bias_format: SAME
445
- input_format: BFP[8|8]{64,-1}(SN)
446
- instance: Linear
447
- output_format: BFP[8|8]{64,-1}(SN)
448
- weight_format: BFP[8|8]{64,-1}(SN)
449
- weight_sparseness: DENSE
450
- model.decoder.layers.12.self_attn_layer_norm:
451
- approximation_function: NONE
452
- bias_format: SAME
453
- input_format: SAME
454
- instance: LayerNorm
455
- output_format: SAME
456
- weight_format: SAME
457
- model.decoder.layers.13.activation_fn:
458
- approximation_function: NONE
459
- input_format: SAME
460
- instance: ReLU
461
- output_format: SAME
462
- model.decoder.layers.13.dropout:
463
- approximation_function: NONE
464
- input_format: SAME
465
- instance: Dropout
466
- output_format: SAME
467
- model.decoder.layers.13.fc1:
468
- accum_format: SAME
469
- approximation_function: NONE
470
- bias_format: SAME
471
- input_format: BFP[8|8]{64,-1}(SN)
472
- instance: Linear
473
- output_format: SAME
474
- weight_format: BFP[8|8]{64,-1}(SN)
475
- weight_sparseness: DENSE
476
- model.decoder.layers.13.fc2:
477
- accum_format: SAME
478
- approximation_function: NONE
479
- bias_format: SAME
480
- input_format: BFP[8|8]{64,-1}(SN)
481
- instance: Linear
482
- output_format: SAME
483
- weight_format: BFP[8|8]{64,-1}(SN)
484
- weight_sparseness: DENSE
485
- model.decoder.layers.13.final_layer_norm:
486
- approximation_function: NONE
487
- bias_format: SAME
488
- input_format: SAME
489
- instance: LayerNorm
490
- output_format: SAME
491
- weight_format: SAME
492
- model.decoder.layers.13.self_attn.dropout:
493
- approximation_function: NONE
494
- input_format: SAME
495
- instance: Dropout
496
- output_format: BFP[8|8]{64,-1}(SN)
497
- model.decoder.layers.13.self_attn.k_proj:
498
- accum_format: SAME
499
- approximation_function: NONE
500
- bias_format: SAME
501
- input_format: BFP[8|8]{64,-1}(SN)
502
- instance: Linear
503
- output_format: BFP[8|8]{64,-1}(SN)
504
- weight_format: BFP[8|8]{64,-1}(SN)
505
- weight_sparseness: DENSE
506
- model.decoder.layers.13.self_attn.out_proj:
507
- accum_format: SAME
508
- approximation_function: NONE
509
- bias_format: SAME
510
- input_format: BFP[8|8]{64,-1}(SN)
511
- instance: Linear
512
- output_format: SAME
513
- weight_format: BFP[8|8]{64,-1}(SN)
514
- weight_sparseness: DENSE
515
- model.decoder.layers.13.self_attn.q_proj:
516
- accum_format: SAME
517
- approximation_function: NONE
518
- bias_format: SAME
519
- input_format: BFP[8|8]{64,-1}(SN)
520
- instance: Linear
521
- output_format: BFP[8|8]{64,-1}(SN)
522
- weight_format: BFP[8|8]{64,-1}(SN)
523
- weight_sparseness: DENSE
524
- model.decoder.layers.13.self_attn.softmax:
525
- approximation_function: NONE
526
- input_format: SAME
527
- instance: Softmax
528
- output_format: SAME
529
- model.decoder.layers.13.self_attn.v_proj:
530
- accum_format: SAME
531
- approximation_function: NONE
532
- bias_format: SAME
533
- input_format: BFP[8|8]{64,-1}(SN)
534
- instance: Linear
535
- output_format: BFP[8|8]{64,-1}(SN)
536
- weight_format: BFP[8|8]{64,-1}(SN)
537
- weight_sparseness: DENSE
538
- model.decoder.layers.13.self_attn_layer_norm:
539
- approximation_function: NONE
540
- bias_format: SAME
541
- input_format: SAME
542
- instance: LayerNorm
543
- output_format: SAME
544
- weight_format: SAME
545
- model.decoder.layers.14.activation_fn:
546
- approximation_function: NONE
547
- input_format: SAME
548
- instance: ReLU
549
- output_format: SAME
550
- model.decoder.layers.14.dropout:
551
- approximation_function: NONE
552
- input_format: SAME
553
- instance: Dropout
554
- output_format: SAME
555
- model.decoder.layers.14.fc1:
556
- accum_format: SAME
557
- approximation_function: NONE
558
- bias_format: SAME
559
- input_format: BFP[8|8]{64,-1}(SN)
560
- instance: Linear
561
- output_format: SAME
562
- weight_format: BFP[8|8]{64,-1}(SN)
563
- weight_sparseness: DENSE
564
- model.decoder.layers.14.fc2:
565
- accum_format: SAME
566
- approximation_function: NONE
567
- bias_format: SAME
568
- input_format: BFP[8|8]{64,-1}(SN)
569
- instance: Linear
570
- output_format: SAME
571
- weight_format: BFP[8|8]{64,-1}(SN)
572
- weight_sparseness: DENSE
573
- model.decoder.layers.14.final_layer_norm:
574
- approximation_function: NONE
575
- bias_format: SAME
576
- input_format: SAME
577
- instance: LayerNorm
578
- output_format: SAME
579
- weight_format: SAME
580
- model.decoder.layers.14.self_attn.dropout:
581
- approximation_function: NONE
582
- input_format: SAME
583
- instance: Dropout
584
- output_format: BFP[8|8]{64,-1}(SN)
585
- model.decoder.layers.14.self_attn.k_proj:
586
- accum_format: SAME
587
- approximation_function: NONE
588
- bias_format: SAME
589
- input_format: BFP[8|8]{64,-1}(SN)
590
- instance: Linear
591
- output_format: BFP[8|8]{64,-1}(SN)
592
- weight_format: BFP[8|8]{64,-1}(SN)
593
- weight_sparseness: DENSE
594
- model.decoder.layers.14.self_attn.out_proj:
595
- accum_format: SAME
596
- approximation_function: NONE
597
- bias_format: SAME
598
- input_format: BFP[8|8]{64,-1}(SN)
599
- instance: Linear
600
- output_format: SAME
601
- weight_format: BFP[8|8]{64,-1}(SN)
602
- weight_sparseness: DENSE
603
- model.decoder.layers.14.self_attn.q_proj:
604
- accum_format: SAME
605
- approximation_function: NONE
606
- bias_format: SAME
607
- input_format: BFP[8|8]{64,-1}(SN)
608
- instance: Linear
609
- output_format: BFP[8|8]{64,-1}(SN)
610
- weight_format: BFP[8|8]{64,-1}(SN)
611
- weight_sparseness: DENSE
612
- model.decoder.layers.14.self_attn.softmax:
613
- approximation_function: NONE
614
- input_format: SAME
615
- instance: Softmax
616
- output_format: SAME
617
- model.decoder.layers.14.self_attn.v_proj:
618
- accum_format: SAME
619
- approximation_function: NONE
620
- bias_format: SAME
621
- input_format: BFP[8|8]{64,-1}(SN)
622
- instance: Linear
623
- output_format: BFP[8|8]{64,-1}(SN)
624
- weight_format: BFP[8|8]{64,-1}(SN)
625
- weight_sparseness: DENSE
626
- model.decoder.layers.14.self_attn_layer_norm:
627
- approximation_function: NONE
628
- bias_format: SAME
629
- input_format: SAME
630
- instance: LayerNorm
631
- output_format: SAME
632
- weight_format: SAME
633
- model.decoder.layers.15.activation_fn:
634
- approximation_function: NONE
635
- input_format: SAME
636
- instance: ReLU
637
- output_format: SAME
638
- model.decoder.layers.15.dropout:
639
- approximation_function: NONE
640
- input_format: SAME
641
- instance: Dropout
642
- output_format: SAME
643
- model.decoder.layers.15.fc1:
644
- accum_format: SAME
645
- approximation_function: NONE
646
- bias_format: SAME
647
- input_format: BFP[8|8]{64,-1}(SN)
648
- instance: Linear
649
- output_format: SAME
650
- weight_format: BFP[8|8]{64,-1}(SN)
651
- weight_sparseness: DENSE
652
- model.decoder.layers.15.fc2:
653
- accum_format: SAME
654
- approximation_function: NONE
655
- bias_format: SAME
656
- input_format: BFP[8|8]{64,-1}(SN)
657
- instance: Linear
658
- output_format: SAME
659
- weight_format: BFP[8|8]{64,-1}(SN)
660
- weight_sparseness: DENSE
661
- model.decoder.layers.15.final_layer_norm:
662
- approximation_function: NONE
663
- bias_format: SAME
664
- input_format: SAME
665
- instance: LayerNorm
666
- output_format: SAME
667
- weight_format: SAME
668
- model.decoder.layers.15.self_attn.dropout:
669
- approximation_function: NONE
670
- input_format: SAME
671
- instance: Dropout
672
- output_format: BFP[8|8]{64,-1}(SN)
673
- model.decoder.layers.15.self_attn.k_proj:
674
- accum_format: SAME
675
- approximation_function: NONE
676
- bias_format: SAME
677
- input_format: BFP[8|8]{64,-1}(SN)
678
- instance: Linear
679
- output_format: BFP[8|8]{64,-1}(SN)
680
- weight_format: BFP[8|8]{64,-1}(SN)
681
- weight_sparseness: DENSE
682
- model.decoder.layers.15.self_attn.out_proj:
683
- accum_format: SAME
684
- approximation_function: NONE
685
- bias_format: SAME
686
- input_format: BFP[8|8]{64,-1}(SN)
687
- instance: Linear
688
- output_format: SAME
689
- weight_format: BFP[8|8]{64,-1}(SN)
690
- weight_sparseness: DENSE
691
- model.decoder.layers.15.self_attn.q_proj:
692
- accum_format: SAME
693
- approximation_function: NONE
694
- bias_format: SAME
695
- input_format: BFP[8|8]{64,-1}(SN)
696
- instance: Linear
697
- output_format: BFP[8|8]{64,-1}(SN)
698
- weight_format: BFP[8|8]{64,-1}(SN)
699
- weight_sparseness: DENSE
700
- model.decoder.layers.15.self_attn.softmax:
701
- approximation_function: NONE
702
- input_format: SAME
703
- instance: Softmax
704
- output_format: SAME
705
- model.decoder.layers.15.self_attn.v_proj:
706
- accum_format: SAME
707
- approximation_function: NONE
708
- bias_format: SAME
709
- input_format: BFP[8|8]{64,-1}(SN)
710
- instance: Linear
711
- output_format: BFP[8|8]{64,-1}(SN)
712
- weight_format: BFP[8|8]{64,-1}(SN)
713
- weight_sparseness: DENSE
714
- model.decoder.layers.15.self_attn_layer_norm:
715
- approximation_function: NONE
716
- bias_format: SAME
717
- input_format: SAME
718
- instance: LayerNorm
719
- output_format: SAME
720
- weight_format: SAME
721
- model.decoder.layers.16.activation_fn:
722
- approximation_function: NONE
723
- input_format: SAME
724
- instance: ReLU
725
- output_format: SAME
726
- model.decoder.layers.16.dropout:
727
- approximation_function: NONE
728
- input_format: SAME
729
- instance: Dropout
730
- output_format: SAME
731
- model.decoder.layers.16.fc1:
732
- accum_format: SAME
733
- approximation_function: NONE
734
- bias_format: SAME
735
- input_format: BFP[8|8]{64,-1}(SN)
736
- instance: Linear
737
- output_format: SAME
738
- weight_format: BFP[8|8]{64,-1}(SN)
739
- weight_sparseness: DENSE
740
- model.decoder.layers.16.fc2:
741
- accum_format: SAME
742
- approximation_function: NONE
743
- bias_format: SAME
744
- input_format: BFP[8|8]{64,-1}(SN)
745
- instance: Linear
746
- output_format: SAME
747
- weight_format: BFP[8|8]{64,-1}(SN)
748
- weight_sparseness: DENSE
749
- model.decoder.layers.16.final_layer_norm:
750
- approximation_function: NONE
751
- bias_format: SAME
752
- input_format: SAME
753
- instance: LayerNorm
754
- output_format: SAME
755
- weight_format: SAME
756
- model.decoder.layers.16.self_attn.dropout:
757
- approximation_function: NONE
758
- input_format: SAME
759
- instance: Dropout
760
- output_format: BFP[8|8]{64,-1}(SN)
761
- model.decoder.layers.16.self_attn.k_proj:
762
- accum_format: SAME
763
- approximation_function: NONE
764
- bias_format: SAME
765
- input_format: BFP[8|8]{64,-1}(SN)
766
- instance: Linear
767
- output_format: BFP[8|8]{64,-1}(SN)
768
- weight_format: BFP[8|8]{64,-1}(SN)
769
- weight_sparseness: DENSE
770
- model.decoder.layers.16.self_attn.out_proj:
771
- accum_format: SAME
772
- approximation_function: NONE
773
- bias_format: SAME
774
- input_format: BFP[8|8]{64,-1}(SN)
775
- instance: Linear
776
- output_format: SAME
777
- weight_format: BFP[8|8]{64,-1}(SN)
778
- weight_sparseness: DENSE
779
- model.decoder.layers.16.self_attn.q_proj:
780
- accum_format: SAME
781
- approximation_function: NONE
782
- bias_format: SAME
783
- input_format: BFP[8|8]{64,-1}(SN)
784
- instance: Linear
785
- output_format: BFP[8|8]{64,-1}(SN)
786
- weight_format: BFP[8|8]{64,-1}(SN)
787
- weight_sparseness: DENSE
788
- model.decoder.layers.16.self_attn.softmax:
789
- approximation_function: NONE
790
- input_format: SAME
791
- instance: Softmax
792
- output_format: SAME
793
- model.decoder.layers.16.self_attn.v_proj:
794
- accum_format: SAME
795
- approximation_function: NONE
796
- bias_format: SAME
797
- input_format: BFP[8|8]{64,-1}(SN)
798
- instance: Linear
799
- output_format: BFP[8|8]{64,-1}(SN)
800
- weight_format: BFP[8|8]{64,-1}(SN)
801
- weight_sparseness: DENSE
802
- model.decoder.layers.16.self_attn_layer_norm:
803
- approximation_function: NONE
804
- bias_format: SAME
805
- input_format: SAME
806
- instance: LayerNorm
807
- output_format: SAME
808
- weight_format: SAME
809
- model.decoder.layers.17.activation_fn:
810
- approximation_function: NONE
811
- input_format: SAME
812
- instance: ReLU
813
- output_format: SAME
814
- model.decoder.layers.17.dropout:
815
- approximation_function: NONE
816
- input_format: SAME
817
- instance: Dropout
818
- output_format: SAME
819
- model.decoder.layers.17.fc1:
820
- accum_format: SAME
821
- approximation_function: NONE
822
- bias_format: SAME
823
- input_format: BFP[8|8]{64,-1}(SN)
824
- instance: Linear
825
- output_format: SAME
826
- weight_format: BFP[8|8]{64,-1}(SN)
827
- weight_sparseness: DENSE
828
- model.decoder.layers.17.fc2:
829
- accum_format: SAME
830
- approximation_function: NONE
831
- bias_format: SAME
832
- input_format: BFP[8|8]{64,-1}(SN)
833
- instance: Linear
834
- output_format: SAME
835
- weight_format: BFP[8|8]{64,-1}(SN)
836
- weight_sparseness: DENSE
837
- model.decoder.layers.17.final_layer_norm:
838
- approximation_function: NONE
839
- bias_format: SAME
840
- input_format: SAME
841
- instance: LayerNorm
842
- output_format: SAME
843
- weight_format: SAME
844
- model.decoder.layers.17.self_attn.dropout:
845
- approximation_function: NONE
846
- input_format: SAME
847
- instance: Dropout
848
- output_format: BFP[8|8]{64,-1}(SN)
849
- model.decoder.layers.17.self_attn.k_proj:
850
- accum_format: SAME
851
- approximation_function: NONE
852
- bias_format: SAME
853
- input_format: BFP[8|8]{64,-1}(SN)
854
- instance: Linear
855
- output_format: BFP[8|8]{64,-1}(SN)
856
- weight_format: BFP[8|8]{64,-1}(SN)
857
- weight_sparseness: DENSE
858
- model.decoder.layers.17.self_attn.out_proj:
859
- accum_format: SAME
860
- approximation_function: NONE
861
- bias_format: SAME
862
- input_format: BFP[8|8]{64,-1}(SN)
863
- instance: Linear
864
- output_format: SAME
865
- weight_format: BFP[8|8]{64,-1}(SN)
866
- weight_sparseness: DENSE
867
- model.decoder.layers.17.self_attn.q_proj:
868
- accum_format: SAME
869
- approximation_function: NONE
870
- bias_format: SAME
871
- input_format: BFP[8|8]{64,-1}(SN)
872
- instance: Linear
873
- output_format: BFP[8|8]{64,-1}(SN)
874
- weight_format: BFP[8|8]{64,-1}(SN)
875
- weight_sparseness: DENSE
876
- model.decoder.layers.17.self_attn.softmax:
877
- approximation_function: NONE
878
- input_format: SAME
879
- instance: Softmax
880
- output_format: SAME
881
- model.decoder.layers.17.self_attn.v_proj:
882
- accum_format: SAME
883
- approximation_function: NONE
884
- bias_format: SAME
885
- input_format: BFP[8|8]{64,-1}(SN)
886
- instance: Linear
887
- output_format: BFP[8|8]{64,-1}(SN)
888
- weight_format: BFP[8|8]{64,-1}(SN)
889
- weight_sparseness: DENSE
890
- model.decoder.layers.17.self_attn_layer_norm:
891
- approximation_function: NONE
892
- bias_format: SAME
893
- input_format: SAME
894
- instance: LayerNorm
895
- output_format: SAME
896
- weight_format: SAME
897
- model.decoder.layers.18.activation_fn:
898
- approximation_function: NONE
899
- input_format: SAME
900
- instance: ReLU
901
- output_format: SAME
902
- model.decoder.layers.18.dropout:
903
- approximation_function: NONE
904
- input_format: SAME
905
- instance: Dropout
906
- output_format: SAME
907
- model.decoder.layers.18.fc1:
908
- accum_format: SAME
909
- approximation_function: NONE
910
- bias_format: SAME
911
- input_format: BFP[8|8]{64,-1}(SN)
912
- instance: Linear
913
- output_format: SAME
914
- weight_format: BFP[8|8]{64,-1}(SN)
915
- weight_sparseness: DENSE
916
- model.decoder.layers.18.fc2:
917
- accum_format: SAME
918
- approximation_function: NONE
919
- bias_format: SAME
920
- input_format: BFP[8|8]{64,-1}(SN)
921
- instance: Linear
922
- output_format: SAME
923
- weight_format: BFP[8|8]{64,-1}(SN)
924
- weight_sparseness: DENSE
925
- model.decoder.layers.18.final_layer_norm:
926
- approximation_function: NONE
927
- bias_format: SAME
928
- input_format: SAME
929
- instance: LayerNorm
930
- output_format: SAME
931
- weight_format: SAME
932
- model.decoder.layers.18.self_attn.dropout:
933
- approximation_function: NONE
934
- input_format: SAME
935
- instance: Dropout
936
- output_format: BFP[8|8]{64,-1}(SN)
937
- model.decoder.layers.18.self_attn.k_proj:
938
- accum_format: SAME
939
- approximation_function: NONE
940
- bias_format: SAME
941
- input_format: BFP[8|8]{64,-1}(SN)
942
- instance: Linear
943
- output_format: BFP[8|8]{64,-1}(SN)
944
- weight_format: BFP[8|8]{64,-1}(SN)
945
- weight_sparseness: DENSE
946
- model.decoder.layers.18.self_attn.out_proj:
947
- accum_format: SAME
948
- approximation_function: NONE
949
- bias_format: SAME
950
- input_format: BFP[8|8]{64,-1}(SN)
951
- instance: Linear
952
- output_format: SAME
953
- weight_format: BFP[8|8]{64,-1}(SN)
954
- weight_sparseness: DENSE
955
- model.decoder.layers.18.self_attn.q_proj:
956
- accum_format: SAME
957
- approximation_function: NONE
958
- bias_format: SAME
959
- input_format: BFP[8|8]{64,-1}(SN)
960
- instance: Linear
961
- output_format: BFP[8|8]{64,-1}(SN)
962
- weight_format: BFP[8|8]{64,-1}(SN)
963
- weight_sparseness: DENSE
964
- model.decoder.layers.18.self_attn.softmax:
965
- approximation_function: NONE
966
- input_format: SAME
967
- instance: Softmax
968
- output_format: SAME
969
- model.decoder.layers.18.self_attn.v_proj:
970
- accum_format: SAME
971
- approximation_function: NONE
972
- bias_format: SAME
973
- input_format: BFP[8|8]{64,-1}(SN)
974
- instance: Linear
975
- output_format: BFP[8|8]{64,-1}(SN)
976
- weight_format: BFP[8|8]{64,-1}(SN)
977
- weight_sparseness: DENSE
978
- model.decoder.layers.18.self_attn_layer_norm:
979
- approximation_function: NONE
980
- bias_format: SAME
981
- input_format: SAME
982
- instance: LayerNorm
983
- output_format: SAME
984
- weight_format: SAME
985
- model.decoder.layers.19.activation_fn:
986
- approximation_function: NONE
987
- input_format: SAME
988
- instance: ReLU
989
- output_format: SAME
990
- model.decoder.layers.19.dropout:
991
- approximation_function: NONE
992
- input_format: SAME
993
- instance: Dropout
994
- output_format: SAME
995
- model.decoder.layers.19.fc1:
996
- accum_format: SAME
997
- approximation_function: NONE
998
- bias_format: SAME
999
- input_format: BFP[8|8]{64,-1}(SN)
1000
- instance: Linear
1001
- output_format: SAME
1002
- weight_format: BFP[8|8]{64,-1}(SN)
1003
- weight_sparseness: DENSE
1004
- model.decoder.layers.19.fc2:
1005
- accum_format: SAME
1006
- approximation_function: NONE
1007
- bias_format: SAME
1008
- input_format: BFP[8|8]{64,-1}(SN)
1009
- instance: Linear
1010
- output_format: SAME
1011
- weight_format: BFP[8|8]{64,-1}(SN)
1012
- weight_sparseness: DENSE
1013
- model.decoder.layers.19.final_layer_norm:
1014
- approximation_function: NONE
1015
- bias_format: SAME
1016
- input_format: SAME
1017
- instance: LayerNorm
1018
- output_format: SAME
1019
- weight_format: SAME
1020
- model.decoder.layers.19.self_attn.dropout:
1021
- approximation_function: NONE
1022
- input_format: SAME
1023
- instance: Dropout
1024
- output_format: BFP[8|8]{64,-1}(SN)
1025
- model.decoder.layers.19.self_attn.k_proj:
1026
- accum_format: SAME
1027
- approximation_function: NONE
1028
- bias_format: SAME
1029
- input_format: BFP[8|8]{64,-1}(SN)
1030
- instance: Linear
1031
- output_format: BFP[8|8]{64,-1}(SN)
1032
- weight_format: BFP[8|8]{64,-1}(SN)
1033
- weight_sparseness: DENSE
1034
- model.decoder.layers.19.self_attn.out_proj:
1035
- accum_format: SAME
1036
- approximation_function: NONE
1037
- bias_format: SAME
1038
- input_format: BFP[8|8]{64,-1}(SN)
1039
- instance: Linear
1040
- output_format: SAME
1041
- weight_format: BFP[8|8]{64,-1}(SN)
1042
- weight_sparseness: DENSE
1043
- model.decoder.layers.19.self_attn.q_proj:
1044
- accum_format: SAME
1045
- approximation_function: NONE
1046
- bias_format: SAME
1047
- input_format: BFP[8|8]{64,-1}(SN)
1048
- instance: Linear
1049
- output_format: BFP[8|8]{64,-1}(SN)
1050
- weight_format: BFP[8|8]{64,-1}(SN)
1051
- weight_sparseness: DENSE
1052
- model.decoder.layers.19.self_attn.softmax:
1053
- approximation_function: NONE
1054
- input_format: SAME
1055
- instance: Softmax
1056
- output_format: SAME
1057
- model.decoder.layers.19.self_attn.v_proj:
1058
- accum_format: SAME
1059
- approximation_function: NONE
1060
- bias_format: SAME
1061
- input_format: BFP[8|8]{64,-1}(SN)
1062
- instance: Linear
1063
- output_format: BFP[8|8]{64,-1}(SN)
1064
- weight_format: BFP[8|8]{64,-1}(SN)
1065
- weight_sparseness: DENSE
1066
- model.decoder.layers.19.self_attn_layer_norm:
1067
- approximation_function: NONE
1068
- bias_format: SAME
1069
- input_format: SAME
1070
- instance: LayerNorm
1071
- output_format: SAME
1072
- weight_format: SAME
1073
- model.decoder.layers.2.activation_fn:
1074
- approximation_function: NONE
1075
- input_format: SAME
1076
- instance: ReLU
1077
- output_format: SAME
1078
- model.decoder.layers.2.dropout:
1079
- approximation_function: NONE
1080
- input_format: SAME
1081
- instance: Dropout
1082
- output_format: SAME
1083
- model.decoder.layers.2.fc1:
1084
- accum_format: SAME
1085
- approximation_function: NONE
1086
- bias_format: SAME
1087
- input_format: BFP[8|8]{64,-1}(SN)
1088
- instance: Linear
1089
- output_format: SAME
1090
- weight_format: BFP[8|8]{64,-1}(SN)
1091
- weight_sparseness: DENSE
1092
- model.decoder.layers.2.fc2:
1093
- accum_format: SAME
1094
- approximation_function: NONE
1095
- bias_format: SAME
1096
- input_format: BFP[8|8]{64,-1}(SN)
1097
- instance: Linear
1098
- output_format: SAME
1099
- weight_format: BFP[8|8]{64,-1}(SN)
1100
- weight_sparseness: DENSE
1101
- model.decoder.layers.2.final_layer_norm:
1102
- approximation_function: NONE
1103
- bias_format: SAME
1104
- input_format: SAME
1105
- instance: LayerNorm
1106
- output_format: SAME
1107
- weight_format: SAME
1108
- model.decoder.layers.2.self_attn.dropout:
1109
- approximation_function: NONE
1110
- input_format: SAME
1111
- instance: Dropout
1112
- output_format: BFP[8|8]{64,-1}(SN)
1113
- model.decoder.layers.2.self_attn.k_proj:
1114
- accum_format: SAME
1115
- approximation_function: NONE
1116
- bias_format: SAME
1117
- input_format: BFP[8|8]{64,-1}(SN)
1118
- instance: Linear
1119
- output_format: BFP[8|8]{64,-1}(SN)
1120
- weight_format: BFP[8|8]{64,-1}(SN)
1121
- weight_sparseness: DENSE
1122
- model.decoder.layers.2.self_attn.out_proj:
1123
- accum_format: SAME
1124
- approximation_function: NONE
1125
- bias_format: SAME
1126
- input_format: BFP[8|8]{64,-1}(SN)
1127
- instance: Linear
1128
- output_format: SAME
1129
- weight_format: BFP[8|8]{64,-1}(SN)
1130
- weight_sparseness: DENSE
1131
- model.decoder.layers.2.self_attn.q_proj:
1132
- accum_format: SAME
1133
- approximation_function: NONE
1134
- bias_format: SAME
1135
- input_format: BFP[8|8]{64,-1}(SN)
1136
- instance: Linear
1137
- output_format: BFP[8|8]{64,-1}(SN)
1138
- weight_format: BFP[8|8]{64,-1}(SN)
1139
- weight_sparseness: DENSE
1140
- model.decoder.layers.2.self_attn.softmax:
1141
- approximation_function: NONE
1142
- input_format: SAME
1143
- instance: Softmax
1144
- output_format: SAME
1145
- model.decoder.layers.2.self_attn.v_proj:
1146
- accum_format: SAME
1147
- approximation_function: NONE
1148
- bias_format: SAME
1149
- input_format: BFP[8|8]{64,-1}(SN)
1150
- instance: Linear
1151
- output_format: BFP[8|8]{64,-1}(SN)
1152
- weight_format: BFP[8|8]{64,-1}(SN)
1153
- weight_sparseness: DENSE
1154
- model.decoder.layers.2.self_attn_layer_norm:
1155
- approximation_function: NONE
1156
- bias_format: SAME
1157
- input_format: SAME
1158
- instance: LayerNorm
1159
- output_format: SAME
1160
- weight_format: SAME
1161
- model.decoder.layers.20.activation_fn:
1162
- approximation_function: NONE
1163
- input_format: SAME
1164
- instance: ReLU
1165
- output_format: SAME
1166
- model.decoder.layers.20.dropout:
1167
- approximation_function: NONE
1168
- input_format: SAME
1169
- instance: Dropout
1170
- output_format: SAME
1171
- model.decoder.layers.20.fc1:
1172
- accum_format: SAME
1173
- approximation_function: NONE
1174
- bias_format: SAME
1175
- input_format: BFP[8|8]{64,-1}(SN)
1176
- instance: Linear
1177
- output_format: SAME
1178
- weight_format: BFP[8|8]{64,-1}(SN)
1179
- weight_sparseness: DENSE
1180
- model.decoder.layers.20.fc2:
1181
- accum_format: SAME
1182
- approximation_function: NONE
1183
- bias_format: SAME
1184
- input_format: BFP[8|8]{64,-1}(SN)
1185
- instance: Linear
1186
- output_format: SAME
1187
- weight_format: BFP[8|8]{64,-1}(SN)
1188
- weight_sparseness: DENSE
1189
- model.decoder.layers.20.final_layer_norm:
1190
- approximation_function: NONE
1191
- bias_format: SAME
1192
- input_format: SAME
1193
- instance: LayerNorm
1194
- output_format: SAME
1195
- weight_format: SAME
1196
- model.decoder.layers.20.self_attn.dropout:
1197
- approximation_function: NONE
1198
- input_format: SAME
1199
- instance: Dropout
1200
- output_format: BFP[8|8]{64,-1}(SN)
1201
- model.decoder.layers.20.self_attn.k_proj:
1202
- accum_format: SAME
1203
- approximation_function: NONE
1204
- bias_format: SAME
1205
- input_format: BFP[8|8]{64,-1}(SN)
1206
- instance: Linear
1207
- output_format: BFP[8|8]{64,-1}(SN)
1208
- weight_format: BFP[8|8]{64,-1}(SN)
1209
- weight_sparseness: DENSE
1210
- model.decoder.layers.20.self_attn.out_proj:
1211
- accum_format: SAME
1212
- approximation_function: NONE
1213
- bias_format: SAME
1214
- input_format: BFP[8|8]{64,-1}(SN)
1215
- instance: Linear
1216
- output_format: SAME
1217
- weight_format: BFP[8|8]{64,-1}(SN)
1218
- weight_sparseness: DENSE
1219
- model.decoder.layers.20.self_attn.q_proj:
1220
- accum_format: SAME
1221
- approximation_function: NONE
1222
- bias_format: SAME
1223
- input_format: BFP[8|8]{64,-1}(SN)
1224
- instance: Linear
1225
- output_format: BFP[8|8]{64,-1}(SN)
1226
- weight_format: BFP[8|8]{64,-1}(SN)
1227
- weight_sparseness: DENSE
1228
- model.decoder.layers.20.self_attn.softmax:
1229
- approximation_function: NONE
1230
- input_format: SAME
1231
- instance: Softmax
1232
- output_format: SAME
1233
- model.decoder.layers.20.self_attn.v_proj:
1234
- accum_format: SAME
1235
- approximation_function: NONE
1236
- bias_format: SAME
1237
- input_format: BFP[8|8]{64,-1}(SN)
1238
- instance: Linear
1239
- output_format: BFP[8|8]{64,-1}(SN)
1240
- weight_format: BFP[8|8]{64,-1}(SN)
1241
- weight_sparseness: DENSE
1242
- model.decoder.layers.20.self_attn_layer_norm:
1243
- approximation_function: NONE
1244
- bias_format: SAME
1245
- input_format: SAME
1246
- instance: LayerNorm
1247
- output_format: SAME
1248
- weight_format: SAME
1249
- model.decoder.layers.21.activation_fn:
1250
- approximation_function: NONE
1251
- input_format: SAME
1252
- instance: ReLU
1253
- output_format: SAME
1254
- model.decoder.layers.21.dropout:
1255
- approximation_function: NONE
1256
- input_format: SAME
1257
- instance: Dropout
1258
- output_format: SAME
1259
- model.decoder.layers.21.fc1:
1260
- accum_format: SAME
1261
- approximation_function: NONE
1262
- bias_format: SAME
1263
- input_format: BFP[8|8]{64,-1}(SN)
1264
- instance: Linear
1265
- output_format: SAME
1266
- weight_format: BFP[8|8]{64,-1}(SN)
1267
- weight_sparseness: DENSE
1268
- model.decoder.layers.21.fc2:
1269
- accum_format: SAME
1270
- approximation_function: NONE
1271
- bias_format: SAME
1272
- input_format: BFP[8|8]{64,-1}(SN)
1273
- instance: Linear
1274
- output_format: SAME
1275
- weight_format: BFP[8|8]{64,-1}(SN)
1276
- weight_sparseness: DENSE
1277
- model.decoder.layers.21.final_layer_norm:
1278
- approximation_function: NONE
1279
- bias_format: SAME
1280
- input_format: SAME
1281
- instance: LayerNorm
1282
- output_format: SAME
1283
- weight_format: SAME
1284
- model.decoder.layers.21.self_attn.dropout:
1285
- approximation_function: NONE
1286
- input_format: SAME
1287
- instance: Dropout
1288
- output_format: BFP[8|8]{64,-1}(SN)
1289
- model.decoder.layers.21.self_attn.k_proj:
1290
- accum_format: SAME
1291
- approximation_function: NONE
1292
- bias_format: SAME
1293
- input_format: BFP[8|8]{64,-1}(SN)
1294
- instance: Linear
1295
- output_format: BFP[8|8]{64,-1}(SN)
1296
- weight_format: BFP[8|8]{64,-1}(SN)
1297
- weight_sparseness: DENSE
1298
- model.decoder.layers.21.self_attn.out_proj:
1299
- accum_format: SAME
1300
- approximation_function: NONE
1301
- bias_format: SAME
1302
- input_format: BFP[8|8]{64,-1}(SN)
1303
- instance: Linear
1304
- output_format: SAME
1305
- weight_format: BFP[8|8]{64,-1}(SN)
1306
- weight_sparseness: DENSE
1307
- model.decoder.layers.21.self_attn.q_proj:
1308
- accum_format: SAME
1309
- approximation_function: NONE
1310
- bias_format: SAME
1311
- input_format: BFP[8|8]{64,-1}(SN)
1312
- instance: Linear
1313
- output_format: BFP[8|8]{64,-1}(SN)
1314
- weight_format: BFP[8|8]{64,-1}(SN)
1315
- weight_sparseness: DENSE
1316
- model.decoder.layers.21.self_attn.softmax:
1317
- approximation_function: NONE
1318
- input_format: SAME
1319
- instance: Softmax
1320
- output_format: SAME
1321
- model.decoder.layers.21.self_attn.v_proj:
1322
- accum_format: SAME
1323
- approximation_function: NONE
1324
- bias_format: SAME
1325
- input_format: BFP[8|8]{64,-1}(SN)
1326
- instance: Linear
1327
- output_format: BFP[8|8]{64,-1}(SN)
1328
- weight_format: BFP[8|8]{64,-1}(SN)
1329
- weight_sparseness: DENSE
1330
- model.decoder.layers.21.self_attn_layer_norm:
1331
- approximation_function: NONE
1332
- bias_format: SAME
1333
- input_format: SAME
1334
- instance: LayerNorm
1335
- output_format: SAME
1336
- weight_format: SAME
1337
- model.decoder.layers.22.activation_fn:
1338
- approximation_function: NONE
1339
- input_format: SAME
1340
- instance: ReLU
1341
- output_format: SAME
1342
- model.decoder.layers.22.dropout:
1343
- approximation_function: NONE
1344
- input_format: SAME
1345
- instance: Dropout
1346
- output_format: SAME
1347
- model.decoder.layers.22.fc1:
1348
- accum_format: SAME
1349
- approximation_function: NONE
1350
- bias_format: SAME
1351
- input_format: BFP[8|8]{64,-1}(SN)
1352
- instance: Linear
1353
- output_format: SAME
1354
- weight_format: BFP[8|8]{64,-1}(SN)
1355
- weight_sparseness: DENSE
1356
- model.decoder.layers.22.fc2:
1357
- accum_format: SAME
1358
- approximation_function: NONE
1359
- bias_format: SAME
1360
- input_format: BFP[8|8]{64,-1}(SN)
1361
- instance: Linear
1362
- output_format: SAME
1363
- weight_format: BFP[8|8]{64,-1}(SN)
1364
- weight_sparseness: DENSE
1365
- model.decoder.layers.22.final_layer_norm:
1366
- approximation_function: NONE
1367
- bias_format: SAME
1368
- input_format: SAME
1369
- instance: LayerNorm
1370
- output_format: SAME
1371
- weight_format: SAME
1372
- model.decoder.layers.22.self_attn.dropout:
1373
- approximation_function: NONE
1374
- input_format: SAME
1375
- instance: Dropout
1376
- output_format: BFP[8|8]{64,-1}(SN)
1377
- model.decoder.layers.22.self_attn.k_proj:
1378
- accum_format: SAME
1379
- approximation_function: NONE
1380
- bias_format: SAME
1381
- input_format: BFP[8|8]{64,-1}(SN)
1382
- instance: Linear
1383
- output_format: BFP[8|8]{64,-1}(SN)
1384
- weight_format: BFP[8|8]{64,-1}(SN)
1385
- weight_sparseness: DENSE
1386
- model.decoder.layers.22.self_attn.out_proj:
1387
- accum_format: SAME
1388
- approximation_function: NONE
1389
- bias_format: SAME
1390
- input_format: BFP[8|8]{64,-1}(SN)
1391
- instance: Linear
1392
- output_format: SAME
1393
- weight_format: BFP[8|8]{64,-1}(SN)
1394
- weight_sparseness: DENSE
1395
- model.decoder.layers.22.self_attn.q_proj:
1396
- accum_format: SAME
1397
- approximation_function: NONE
1398
- bias_format: SAME
1399
- input_format: BFP[8|8]{64,-1}(SN)
1400
- instance: Linear
1401
- output_format: BFP[8|8]{64,-1}(SN)
1402
- weight_format: BFP[8|8]{64,-1}(SN)
1403
- weight_sparseness: DENSE
1404
- model.decoder.layers.22.self_attn.softmax:
1405
- approximation_function: NONE
1406
- input_format: SAME
1407
- instance: Softmax
1408
- output_format: SAME
1409
- model.decoder.layers.22.self_attn.v_proj:
1410
- accum_format: SAME
1411
- approximation_function: NONE
1412
- bias_format: SAME
1413
- input_format: BFP[8|8]{64,-1}(SN)
1414
- instance: Linear
1415
- output_format: BFP[8|8]{64,-1}(SN)
1416
- weight_format: BFP[8|8]{64,-1}(SN)
1417
- weight_sparseness: DENSE
1418
- model.decoder.layers.22.self_attn_layer_norm:
1419
- approximation_function: NONE
1420
- bias_format: SAME
1421
- input_format: SAME
1422
- instance: LayerNorm
1423
- output_format: SAME
1424
- weight_format: SAME
1425
- model.decoder.layers.23.activation_fn:
1426
- approximation_function: NONE
1427
- input_format: SAME
1428
- instance: ReLU
1429
- output_format: SAME
1430
- model.decoder.layers.23.dropout:
1431
- approximation_function: NONE
1432
- input_format: SAME
1433
- instance: Dropout
1434
- output_format: SAME
1435
- model.decoder.layers.23.fc1:
1436
- accum_format: SAME
1437
- approximation_function: NONE
1438
- bias_format: SAME
1439
- input_format: BFP[8|8]{64,-1}(SN)
1440
- instance: Linear
1441
- output_format: SAME
1442
- weight_format: BFP[8|8]{64,-1}(SN)
1443
- weight_sparseness: DENSE
1444
- model.decoder.layers.23.fc2:
1445
- accum_format: SAME
1446
- approximation_function: NONE
1447
- bias_format: SAME
1448
- input_format: BFP[8|8]{64,-1}(SN)
1449
- instance: Linear
1450
- output_format: SAME
1451
- weight_format: BFP[8|8]{64,-1}(SN)
1452
- weight_sparseness: DENSE
1453
- model.decoder.layers.23.final_layer_norm:
1454
- approximation_function: NONE
1455
- bias_format: SAME
1456
- input_format: SAME
1457
- instance: LayerNorm
1458
- output_format: SAME
1459
- weight_format: SAME
1460
- model.decoder.layers.23.self_attn.dropout:
1461
- approximation_function: NONE
1462
- input_format: SAME
1463
- instance: Dropout
1464
- output_format: BFP[8|8]{64,-1}(SN)
1465
- model.decoder.layers.23.self_attn.k_proj:
1466
- accum_format: SAME
1467
- approximation_function: NONE
1468
- bias_format: SAME
1469
- input_format: BFP[8|8]{64,-1}(SN)
1470
- instance: Linear
1471
- output_format: BFP[8|8]{64,-1}(SN)
1472
- weight_format: BFP[8|8]{64,-1}(SN)
1473
- weight_sparseness: DENSE
1474
- model.decoder.layers.23.self_attn.out_proj:
1475
- accum_format: SAME
1476
- approximation_function: NONE
1477
- bias_format: SAME
1478
- input_format: BFP[8|8]{64,-1}(SN)
1479
- instance: Linear
1480
- output_format: SAME
1481
- weight_format: BFP[8|8]{64,-1}(SN)
1482
- weight_sparseness: DENSE
1483
- model.decoder.layers.23.self_attn.q_proj:
1484
- accum_format: SAME
1485
- approximation_function: NONE
1486
- bias_format: SAME
1487
- input_format: BFP[8|8]{64,-1}(SN)
1488
- instance: Linear
1489
- output_format: BFP[8|8]{64,-1}(SN)
1490
- weight_format: BFP[8|8]{64,-1}(SN)
1491
- weight_sparseness: DENSE
1492
- model.decoder.layers.23.self_attn.softmax:
1493
- approximation_function: NONE
1494
- input_format: SAME
1495
- instance: Softmax
1496
- output_format: SAME
1497
- model.decoder.layers.23.self_attn.v_proj:
1498
- accum_format: SAME
1499
- approximation_function: NONE
1500
- bias_format: SAME
1501
- input_format: BFP[8|8]{64,-1}(SN)
1502
- instance: Linear
1503
- output_format: BFP[8|8]{64,-1}(SN)
1504
- weight_format: BFP[8|8]{64,-1}(SN)
1505
- weight_sparseness: DENSE
1506
- model.decoder.layers.23.self_attn_layer_norm:
1507
- approximation_function: NONE
1508
- bias_format: SAME
1509
- input_format: SAME
1510
- instance: LayerNorm
1511
- output_format: SAME
1512
- weight_format: SAME
1513
- model.decoder.layers.24.activation_fn:
1514
- approximation_function: NONE
1515
- input_format: SAME
1516
- instance: ReLU
1517
- output_format: SAME
1518
- model.decoder.layers.24.dropout:
1519
- approximation_function: NONE
1520
- input_format: SAME
1521
- instance: Dropout
1522
- output_format: SAME
1523
- model.decoder.layers.24.fc1:
1524
- accum_format: SAME
1525
- approximation_function: NONE
1526
- bias_format: SAME
1527
- input_format: BFP[8|8]{64,-1}(SN)
1528
- instance: Linear
1529
- output_format: SAME
1530
- weight_format: BFP[8|8]{64,-1}(SN)
1531
- weight_sparseness: DENSE
1532
- model.decoder.layers.24.fc2:
1533
- accum_format: SAME
1534
- approximation_function: NONE
1535
- bias_format: SAME
1536
- input_format: BFP[8|8]{64,-1}(SN)
1537
- instance: Linear
1538
- output_format: SAME
1539
- weight_format: BFP[8|8]{64,-1}(SN)
1540
- weight_sparseness: DENSE
1541
- model.decoder.layers.24.final_layer_norm:
1542
- approximation_function: NONE
1543
- bias_format: SAME
1544
- input_format: SAME
1545
- instance: LayerNorm
1546
- output_format: SAME
1547
- weight_format: SAME
1548
- model.decoder.layers.24.self_attn.dropout:
1549
- approximation_function: NONE
1550
- input_format: SAME
1551
- instance: Dropout
1552
- output_format: BFP[8|8]{64,-1}(SN)
1553
- model.decoder.layers.24.self_attn.k_proj:
1554
- accum_format: SAME
1555
- approximation_function: NONE
1556
- bias_format: SAME
1557
- input_format: BFP[8|8]{64,-1}(SN)
1558
- instance: Linear
1559
- output_format: BFP[8|8]{64,-1}(SN)
1560
- weight_format: BFP[8|8]{64,-1}(SN)
1561
- weight_sparseness: DENSE
1562
- model.decoder.layers.24.self_attn.out_proj:
1563
- accum_format: SAME
1564
- approximation_function: NONE
1565
- bias_format: SAME
1566
- input_format: BFP[8|8]{64,-1}(SN)
1567
- instance: Linear
1568
- output_format: SAME
1569
- weight_format: BFP[8|8]{64,-1}(SN)
1570
- weight_sparseness: DENSE
1571
- model.decoder.layers.24.self_attn.q_proj:
1572
- accum_format: SAME
1573
- approximation_function: NONE
1574
- bias_format: SAME
1575
- input_format: BFP[8|8]{64,-1}(SN)
1576
- instance: Linear
1577
- output_format: BFP[8|8]{64,-1}(SN)
1578
- weight_format: BFP[8|8]{64,-1}(SN)
1579
- weight_sparseness: DENSE
1580
- model.decoder.layers.24.self_attn.softmax:
1581
- approximation_function: NONE
1582
- input_format: SAME
1583
- instance: Softmax
1584
- output_format: SAME
1585
- model.decoder.layers.24.self_attn.v_proj:
1586
- accum_format: SAME
1587
- approximation_function: NONE
1588
- bias_format: SAME
1589
- input_format: BFP[8|8]{64,-1}(SN)
1590
- instance: Linear
1591
- output_format: BFP[8|8]{64,-1}(SN)
1592
- weight_format: BFP[8|8]{64,-1}(SN)
1593
- weight_sparseness: DENSE
1594
- model.decoder.layers.24.self_attn_layer_norm:
1595
- approximation_function: NONE
1596
- bias_format: SAME
1597
- input_format: SAME
1598
- instance: LayerNorm
1599
- output_format: SAME
1600
- weight_format: SAME
1601
- model.decoder.layers.25.activation_fn:
1602
- approximation_function: NONE
1603
- input_format: SAME
1604
- instance: ReLU
1605
- output_format: SAME
1606
- model.decoder.layers.25.dropout:
1607
- approximation_function: NONE
1608
- input_format: SAME
1609
- instance: Dropout
1610
- output_format: SAME
1611
- model.decoder.layers.25.fc1:
1612
- accum_format: SAME
1613
- approximation_function: NONE
1614
- bias_format: SAME
1615
- input_format: BFP[8|8]{64,-1}(SN)
1616
- instance: Linear
1617
- output_format: SAME
1618
- weight_format: BFP[8|8]{64,-1}(SN)
1619
- weight_sparseness: DENSE
1620
- model.decoder.layers.25.fc2:
1621
- accum_format: SAME
1622
- approximation_function: NONE
1623
- bias_format: SAME
1624
- input_format: BFP[8|8]{64,-1}(SN)
1625
- instance: Linear
1626
- output_format: SAME
1627
- weight_format: BFP[8|8]{64,-1}(SN)
1628
- weight_sparseness: DENSE
1629
- model.decoder.layers.25.final_layer_norm:
1630
- approximation_function: NONE
1631
- bias_format: SAME
1632
- input_format: SAME
1633
- instance: LayerNorm
1634
- output_format: SAME
1635
- weight_format: SAME
1636
- model.decoder.layers.25.self_attn.dropout:
1637
- approximation_function: NONE
1638
- input_format: SAME
1639
- instance: Dropout
1640
- output_format: BFP[8|8]{64,-1}(SN)
1641
- model.decoder.layers.25.self_attn.k_proj:
1642
- accum_format: SAME
1643
- approximation_function: NONE
1644
- bias_format: SAME
1645
- input_format: BFP[8|8]{64,-1}(SN)
1646
- instance: Linear
1647
- output_format: BFP[8|8]{64,-1}(SN)
1648
- weight_format: BFP[8|8]{64,-1}(SN)
1649
- weight_sparseness: DENSE
1650
- model.decoder.layers.25.self_attn.out_proj:
1651
- accum_format: SAME
1652
- approximation_function: NONE
1653
- bias_format: SAME
1654
- input_format: BFP[8|8]{64,-1}(SN)
1655
- instance: Linear
1656
- output_format: SAME
1657
- weight_format: BFP[8|8]{64,-1}(SN)
1658
- weight_sparseness: DENSE
1659
- model.decoder.layers.25.self_attn.q_proj:
1660
- accum_format: SAME
1661
- approximation_function: NONE
1662
- bias_format: SAME
1663
- input_format: BFP[8|8]{64,-1}(SN)
1664
- instance: Linear
1665
- output_format: BFP[8|8]{64,-1}(SN)
1666
- weight_format: BFP[8|8]{64,-1}(SN)
1667
- weight_sparseness: DENSE
1668
- model.decoder.layers.25.self_attn.softmax:
1669
- approximation_function: NONE
1670
- input_format: SAME
1671
- instance: Softmax
1672
- output_format: SAME
1673
- model.decoder.layers.25.self_attn.v_proj:
1674
- accum_format: SAME
1675
- approximation_function: NONE
1676
- bias_format: SAME
1677
- input_format: BFP[8|8]{64,-1}(SN)
1678
- instance: Linear
1679
- output_format: BFP[8|8]{64,-1}(SN)
1680
- weight_format: BFP[8|8]{64,-1}(SN)
1681
- weight_sparseness: DENSE
1682
- model.decoder.layers.25.self_attn_layer_norm:
1683
- approximation_function: NONE
1684
- bias_format: SAME
1685
- input_format: SAME
1686
- instance: LayerNorm
1687
- output_format: SAME
1688
- weight_format: SAME
1689
- model.decoder.layers.26.activation_fn:
1690
- approximation_function: NONE
1691
- input_format: SAME
1692
- instance: ReLU
1693
- output_format: SAME
1694
- model.decoder.layers.26.dropout:
1695
- approximation_function: NONE
1696
- input_format: SAME
1697
- instance: Dropout
1698
- output_format: SAME
1699
- model.decoder.layers.26.fc1:
1700
- accum_format: SAME
1701
- approximation_function: NONE
1702
- bias_format: SAME
1703
- input_format: BFP[8|8]{64,-1}(SN)
1704
- instance: Linear
1705
- output_format: SAME
1706
- weight_format: BFP[8|8]{64,-1}(SN)
1707
- weight_sparseness: DENSE
1708
- model.decoder.layers.26.fc2:
1709
- accum_format: SAME
1710
- approximation_function: NONE
1711
- bias_format: SAME
1712
- input_format: BFP[8|8]{64,-1}(SN)
1713
- instance: Linear
1714
- output_format: SAME
1715
- weight_format: BFP[8|8]{64,-1}(SN)
1716
- weight_sparseness: DENSE
1717
- model.decoder.layers.26.final_layer_norm:
1718
- approximation_function: NONE
1719
- bias_format: SAME
1720
- input_format: SAME
1721
- instance: LayerNorm
1722
- output_format: SAME
1723
- weight_format: SAME
1724
- model.decoder.layers.26.self_attn.dropout:
1725
- approximation_function: NONE
1726
- input_format: SAME
1727
- instance: Dropout
1728
- output_format: BFP[8|8]{64,-1}(SN)
1729
- model.decoder.layers.26.self_attn.k_proj:
1730
- accum_format: SAME
1731
- approximation_function: NONE
1732
- bias_format: SAME
1733
- input_format: BFP[8|8]{64,-1}(SN)
1734
- instance: Linear
1735
- output_format: BFP[8|8]{64,-1}(SN)
1736
- weight_format: BFP[8|8]{64,-1}(SN)
1737
- weight_sparseness: DENSE
1738
- model.decoder.layers.26.self_attn.out_proj:
1739
- accum_format: SAME
1740
- approximation_function: NONE
1741
- bias_format: SAME
1742
- input_format: BFP[8|8]{64,-1}(SN)
1743
- instance: Linear
1744
- output_format: SAME
1745
- weight_format: BFP[8|8]{64,-1}(SN)
1746
- weight_sparseness: DENSE
1747
- model.decoder.layers.26.self_attn.q_proj:
1748
- accum_format: SAME
1749
- approximation_function: NONE
1750
- bias_format: SAME
1751
- input_format: BFP[8|8]{64,-1}(SN)
1752
- instance: Linear
1753
- output_format: BFP[8|8]{64,-1}(SN)
1754
- weight_format: BFP[8|8]{64,-1}(SN)
1755
- weight_sparseness: DENSE
1756
- model.decoder.layers.26.self_attn.softmax:
1757
- approximation_function: NONE
1758
- input_format: SAME
1759
- instance: Softmax
1760
- output_format: SAME
1761
- model.decoder.layers.26.self_attn.v_proj:
1762
- accum_format: SAME
1763
- approximation_function: NONE
1764
- bias_format: SAME
1765
- input_format: BFP[8|8]{64,-1}(SN)
1766
- instance: Linear
1767
- output_format: BFP[8|8]{64,-1}(SN)
1768
- weight_format: BFP[8|8]{64,-1}(SN)
1769
- weight_sparseness: DENSE
1770
- model.decoder.layers.26.self_attn_layer_norm:
1771
- approximation_function: NONE
1772
- bias_format: SAME
1773
- input_format: SAME
1774
- instance: LayerNorm
1775
- output_format: SAME
1776
- weight_format: SAME
1777
- model.decoder.layers.27.activation_fn:
1778
- approximation_function: NONE
1779
- input_format: SAME
1780
- instance: ReLU
1781
- output_format: SAME
1782
- model.decoder.layers.27.dropout:
1783
- approximation_function: NONE
1784
- input_format: SAME
1785
- instance: Dropout
1786
- output_format: SAME
1787
- model.decoder.layers.27.fc1:
1788
- accum_format: SAME
1789
- approximation_function: NONE
1790
- bias_format: SAME
1791
- input_format: BFP[8|8]{64,-1}(SN)
1792
- instance: Linear
1793
- output_format: SAME
1794
- weight_format: BFP[8|8]{64,-1}(SN)
1795
- weight_sparseness: DENSE
1796
- model.decoder.layers.27.fc2:
1797
- accum_format: SAME
1798
- approximation_function: NONE
1799
- bias_format: SAME
1800
- input_format: BFP[8|8]{64,-1}(SN)
1801
- instance: Linear
1802
- output_format: SAME
1803
- weight_format: BFP[8|8]{64,-1}(SN)
1804
- weight_sparseness: DENSE
1805
- model.decoder.layers.27.final_layer_norm:
1806
- approximation_function: NONE
1807
- bias_format: SAME
1808
- input_format: SAME
1809
- instance: LayerNorm
1810
- output_format: SAME
1811
- weight_format: SAME
1812
- model.decoder.layers.27.self_attn.dropout:
1813
- approximation_function: NONE
1814
- input_format: SAME
1815
- instance: Dropout
1816
- output_format: BFP[8|8]{64,-1}(SN)
1817
- model.decoder.layers.27.self_attn.k_proj:
1818
- accum_format: SAME
1819
- approximation_function: NONE
1820
- bias_format: SAME
1821
- input_format: BFP[8|8]{64,-1}(SN)
1822
- instance: Linear
1823
- output_format: BFP[8|8]{64,-1}(SN)
1824
- weight_format: BFP[8|8]{64,-1}(SN)
1825
- weight_sparseness: DENSE
1826
- model.decoder.layers.27.self_attn.out_proj:
1827
- accum_format: SAME
1828
- approximation_function: NONE
1829
- bias_format: SAME
1830
- input_format: BFP[8|8]{64,-1}(SN)
1831
- instance: Linear
1832
- output_format: SAME
1833
- weight_format: BFP[8|8]{64,-1}(SN)
1834
- weight_sparseness: DENSE
1835
- model.decoder.layers.27.self_attn.q_proj:
1836
- accum_format: SAME
1837
- approximation_function: NONE
1838
- bias_format: SAME
1839
- input_format: BFP[8|8]{64,-1}(SN)
1840
- instance: Linear
1841
- output_format: BFP[8|8]{64,-1}(SN)
1842
- weight_format: BFP[8|8]{64,-1}(SN)
1843
- weight_sparseness: DENSE
1844
- model.decoder.layers.27.self_attn.softmax:
1845
- approximation_function: NONE
1846
- input_format: SAME
1847
- instance: Softmax
1848
- output_format: SAME
1849
- model.decoder.layers.27.self_attn.v_proj:
1850
- accum_format: SAME
1851
- approximation_function: NONE
1852
- bias_format: SAME
1853
- input_format: BFP[8|8]{64,-1}(SN)
1854
- instance: Linear
1855
- output_format: BFP[8|8]{64,-1}(SN)
1856
- weight_format: BFP[8|8]{64,-1}(SN)
1857
- weight_sparseness: DENSE
1858
- model.decoder.layers.27.self_attn_layer_norm:
1859
- approximation_function: NONE
1860
- bias_format: SAME
1861
- input_format: SAME
1862
- instance: LayerNorm
1863
- output_format: SAME
1864
- weight_format: SAME
1865
- model.decoder.layers.28.activation_fn:
1866
- approximation_function: NONE
1867
- input_format: SAME
1868
- instance: ReLU
1869
- output_format: SAME
1870
- model.decoder.layers.28.dropout:
1871
- approximation_function: NONE
1872
- input_format: SAME
1873
- instance: Dropout
1874
- output_format: SAME
1875
- model.decoder.layers.28.fc1:
1876
- accum_format: SAME
1877
- approximation_function: NONE
1878
- bias_format: SAME
1879
- input_format: BFP[8|8]{64,-1}(SN)
1880
- instance: Linear
1881
- output_format: SAME
1882
- weight_format: BFP[8|8]{64,-1}(SN)
1883
- weight_sparseness: DENSE
1884
- model.decoder.layers.28.fc2:
1885
- accum_format: SAME
1886
- approximation_function: NONE
1887
- bias_format: SAME
1888
- input_format: BFP[8|8]{64,-1}(SN)
1889
- instance: Linear
1890
- output_format: SAME
1891
- weight_format: BFP[8|8]{64,-1}(SN)
1892
- weight_sparseness: DENSE
1893
- model.decoder.layers.28.final_layer_norm:
1894
- approximation_function: NONE
1895
- bias_format: SAME
1896
- input_format: SAME
1897
- instance: LayerNorm
1898
- output_format: SAME
1899
- weight_format: SAME
1900
- model.decoder.layers.28.self_attn.dropout:
1901
- approximation_function: NONE
1902
- input_format: SAME
1903
- instance: Dropout
1904
- output_format: BFP[8|8]{64,-1}(SN)
1905
- model.decoder.layers.28.self_attn.k_proj:
1906
- accum_format: SAME
1907
- approximation_function: NONE
1908
- bias_format: SAME
1909
- input_format: BFP[8|8]{64,-1}(SN)
1910
- instance: Linear
1911
- output_format: BFP[8|8]{64,-1}(SN)
1912
- weight_format: BFP[8|8]{64,-1}(SN)
1913
- weight_sparseness: DENSE
1914
- model.decoder.layers.28.self_attn.out_proj:
1915
- accum_format: SAME
1916
- approximation_function: NONE
1917
- bias_format: SAME
1918
- input_format: BFP[8|8]{64,-1}(SN)
1919
- instance: Linear
1920
- output_format: SAME
1921
- weight_format: BFP[8|8]{64,-1}(SN)
1922
- weight_sparseness: DENSE
1923
- model.decoder.layers.28.self_attn.q_proj:
1924
- accum_format: SAME
1925
- approximation_function: NONE
1926
- bias_format: SAME
1927
- input_format: BFP[8|8]{64,-1}(SN)
1928
- instance: Linear
1929
- output_format: BFP[8|8]{64,-1}(SN)
1930
- weight_format: BFP[8|8]{64,-1}(SN)
1931
- weight_sparseness: DENSE
1932
- model.decoder.layers.28.self_attn.softmax:
1933
- approximation_function: NONE
1934
- input_format: SAME
1935
- instance: Softmax
1936
- output_format: SAME
1937
- model.decoder.layers.28.self_attn.v_proj:
1938
- accum_format: SAME
1939
- approximation_function: NONE
1940
- bias_format: SAME
1941
- input_format: BFP[8|8]{64,-1}(SN)
1942
- instance: Linear
1943
- output_format: BFP[8|8]{64,-1}(SN)
1944
- weight_format: BFP[8|8]{64,-1}(SN)
1945
- weight_sparseness: DENSE
1946
- model.decoder.layers.28.self_attn_layer_norm:
1947
- approximation_function: NONE
1948
- bias_format: SAME
1949
- input_format: SAME
1950
- instance: LayerNorm
1951
- output_format: SAME
1952
- weight_format: SAME
1953
- model.decoder.layers.29.activation_fn:
1954
- approximation_function: NONE
1955
- input_format: SAME
1956
- instance: ReLU
1957
- output_format: SAME
1958
- model.decoder.layers.29.dropout:
1959
- approximation_function: NONE
1960
- input_format: SAME
1961
- instance: Dropout
1962
- output_format: SAME
1963
- model.decoder.layers.29.fc1:
1964
- accum_format: SAME
1965
- approximation_function: NONE
1966
- bias_format: SAME
1967
- input_format: BFP[8|8]{64,-1}(SN)
1968
- instance: Linear
1969
- output_format: SAME
1970
- weight_format: BFP[8|8]{64,-1}(SN)
1971
- weight_sparseness: DENSE
1972
- model.decoder.layers.29.fc2:
1973
- accum_format: SAME
1974
- approximation_function: NONE
1975
- bias_format: SAME
1976
- input_format: BFP[8|8]{64,-1}(SN)
1977
- instance: Linear
1978
- output_format: SAME
1979
- weight_format: BFP[8|8]{64,-1}(SN)
1980
- weight_sparseness: DENSE
1981
- model.decoder.layers.29.final_layer_norm:
1982
- approximation_function: NONE
1983
- bias_format: SAME
1984
- input_format: SAME
1985
- instance: LayerNorm
1986
- output_format: SAME
1987
- weight_format: SAME
1988
- model.decoder.layers.29.self_attn.dropout:
1989
- approximation_function: NONE
1990
- input_format: SAME
1991
- instance: Dropout
1992
- output_format: BFP[8|8]{64,-1}(SN)
1993
- model.decoder.layers.29.self_attn.k_proj:
1994
- accum_format: SAME
1995
- approximation_function: NONE
1996
- bias_format: SAME
1997
- input_format: BFP[8|8]{64,-1}(SN)
1998
- instance: Linear
1999
- output_format: BFP[8|8]{64,-1}(SN)
2000
- weight_format: BFP[8|8]{64,-1}(SN)
2001
- weight_sparseness: DENSE
2002
- model.decoder.layers.29.self_attn.out_proj:
2003
- accum_format: SAME
2004
- approximation_function: NONE
2005
- bias_format: SAME
2006
- input_format: BFP[8|8]{64,-1}(SN)
2007
- instance: Linear
2008
- output_format: SAME
2009
- weight_format: BFP[8|8]{64,-1}(SN)
2010
- weight_sparseness: DENSE
2011
- model.decoder.layers.29.self_attn.q_proj:
2012
- accum_format: SAME
2013
- approximation_function: NONE
2014
- bias_format: SAME
2015
- input_format: BFP[8|8]{64,-1}(SN)
2016
- instance: Linear
2017
- output_format: BFP[8|8]{64,-1}(SN)
2018
- weight_format: BFP[8|8]{64,-1}(SN)
2019
- weight_sparseness: DENSE
2020
- model.decoder.layers.29.self_attn.softmax:
2021
- approximation_function: NONE
2022
- input_format: SAME
2023
- instance: Softmax
2024
- output_format: SAME
2025
- model.decoder.layers.29.self_attn.v_proj:
2026
- accum_format: SAME
2027
- approximation_function: NONE
2028
- bias_format: SAME
2029
- input_format: BFP[8|8]{64,-1}(SN)
2030
- instance: Linear
2031
- output_format: BFP[8|8]{64,-1}(SN)
2032
- weight_format: BFP[8|8]{64,-1}(SN)
2033
- weight_sparseness: DENSE
2034
- model.decoder.layers.29.self_attn_layer_norm:
2035
- approximation_function: NONE
2036
- bias_format: SAME
2037
- input_format: SAME
2038
- instance: LayerNorm
2039
- output_format: SAME
2040
- weight_format: SAME
2041
- model.decoder.layers.3.activation_fn:
2042
- approximation_function: NONE
2043
- input_format: SAME
2044
- instance: ReLU
2045
- output_format: SAME
2046
- model.decoder.layers.3.dropout:
2047
- approximation_function: NONE
2048
- input_format: SAME
2049
- instance: Dropout
2050
- output_format: SAME
2051
- model.decoder.layers.3.fc1:
2052
- accum_format: SAME
2053
- approximation_function: NONE
2054
- bias_format: SAME
2055
- input_format: BFP[8|8]{64,-1}(SN)
2056
- instance: Linear
2057
- output_format: SAME
2058
- weight_format: BFP[8|8]{64,-1}(SN)
2059
- weight_sparseness: DENSE
2060
- model.decoder.layers.3.fc2:
2061
- accum_format: SAME
2062
- approximation_function: NONE
2063
- bias_format: SAME
2064
- input_format: BFP[8|8]{64,-1}(SN)
2065
- instance: Linear
2066
- output_format: SAME
2067
- weight_format: BFP[8|8]{64,-1}(SN)
2068
- weight_sparseness: DENSE
2069
- model.decoder.layers.3.final_layer_norm:
2070
- approximation_function: NONE
2071
- bias_format: SAME
2072
- input_format: SAME
2073
- instance: LayerNorm
2074
- output_format: SAME
2075
- weight_format: SAME
2076
- model.decoder.layers.3.self_attn.dropout:
2077
- approximation_function: NONE
2078
- input_format: SAME
2079
- instance: Dropout
2080
- output_format: BFP[8|8]{64,-1}(SN)
2081
- model.decoder.layers.3.self_attn.k_proj:
2082
- accum_format: SAME
2083
- approximation_function: NONE
2084
- bias_format: SAME
2085
- input_format: BFP[8|8]{64,-1}(SN)
2086
- instance: Linear
2087
- output_format: BFP[8|8]{64,-1}(SN)
2088
- weight_format: BFP[8|8]{64,-1}(SN)
2089
- weight_sparseness: DENSE
2090
- model.decoder.layers.3.self_attn.out_proj:
2091
- accum_format: SAME
2092
- approximation_function: NONE
2093
- bias_format: SAME
2094
- input_format: BFP[8|8]{64,-1}(SN)
2095
- instance: Linear
2096
- output_format: SAME
2097
- weight_format: BFP[8|8]{64,-1}(SN)
2098
- weight_sparseness: DENSE
2099
- model.decoder.layers.3.self_attn.q_proj:
2100
- accum_format: SAME
2101
- approximation_function: NONE
2102
- bias_format: SAME
2103
- input_format: BFP[8|8]{64,-1}(SN)
2104
- instance: Linear
2105
- output_format: BFP[8|8]{64,-1}(SN)
2106
- weight_format: BFP[8|8]{64,-1}(SN)
2107
- weight_sparseness: DENSE
2108
- model.decoder.layers.3.self_attn.softmax:
2109
- approximation_function: NONE
2110
- input_format: SAME
2111
- instance: Softmax
2112
- output_format: SAME
2113
- model.decoder.layers.3.self_attn.v_proj:
2114
- accum_format: SAME
2115
- approximation_function: NONE
2116
- bias_format: SAME
2117
- input_format: BFP[8|8]{64,-1}(SN)
2118
- instance: Linear
2119
- output_format: BFP[8|8]{64,-1}(SN)
2120
- weight_format: BFP[8|8]{64,-1}(SN)
2121
- weight_sparseness: DENSE
2122
- model.decoder.layers.3.self_attn_layer_norm:
2123
- approximation_function: NONE
2124
- bias_format: SAME
2125
- input_format: SAME
2126
- instance: LayerNorm
2127
- output_format: SAME
2128
- weight_format: SAME
2129
- model.decoder.layers.30.activation_fn:
2130
- approximation_function: NONE
2131
- input_format: SAME
2132
- instance: ReLU
2133
- output_format: SAME
2134
- model.decoder.layers.30.dropout:
2135
- approximation_function: NONE
2136
- input_format: SAME
2137
- instance: Dropout
2138
- output_format: SAME
2139
- model.decoder.layers.30.fc1:
2140
- accum_format: SAME
2141
- approximation_function: NONE
2142
- bias_format: SAME
2143
- input_format: BFP[8|8]{64,-1}(SN)
2144
- instance: Linear
2145
- output_format: SAME
2146
- weight_format: BFP[8|8]{64,-1}(SN)
2147
- weight_sparseness: DENSE
2148
- model.decoder.layers.30.fc2:
2149
- accum_format: SAME
2150
- approximation_function: NONE
2151
- bias_format: SAME
2152
- input_format: BFP[8|8]{64,-1}(SN)
2153
- instance: Linear
2154
- output_format: SAME
2155
- weight_format: BFP[8|8]{64,-1}(SN)
2156
- weight_sparseness: DENSE
2157
- model.decoder.layers.30.final_layer_norm:
2158
- approximation_function: NONE
2159
- bias_format: SAME
2160
- input_format: SAME
2161
- instance: LayerNorm
2162
- output_format: SAME
2163
- weight_format: SAME
2164
- model.decoder.layers.30.self_attn.dropout:
2165
- approximation_function: NONE
2166
- input_format: SAME
2167
- instance: Dropout
2168
- output_format: BFP[8|8]{64,-1}(SN)
2169
- model.decoder.layers.30.self_attn.k_proj:
2170
- accum_format: SAME
2171
- approximation_function: NONE
2172
- bias_format: SAME
2173
- input_format: BFP[8|8]{64,-1}(SN)
2174
- instance: Linear
2175
- output_format: BFP[8|8]{64,-1}(SN)
2176
- weight_format: BFP[8|8]{64,-1}(SN)
2177
- weight_sparseness: DENSE
2178
- model.decoder.layers.30.self_attn.out_proj:
2179
- accum_format: SAME
2180
- approximation_function: NONE
2181
- bias_format: SAME
2182
- input_format: BFP[8|8]{64,-1}(SN)
2183
- instance: Linear
2184
- output_format: SAME
2185
- weight_format: BFP[8|8]{64,-1}(SN)
2186
- weight_sparseness: DENSE
2187
- model.decoder.layers.30.self_attn.q_proj:
2188
- accum_format: SAME
2189
- approximation_function: NONE
2190
- bias_format: SAME
2191
- input_format: BFP[8|8]{64,-1}(SN)
2192
- instance: Linear
2193
- output_format: BFP[8|8]{64,-1}(SN)
2194
- weight_format: BFP[8|8]{64,-1}(SN)
2195
- weight_sparseness: DENSE
2196
- model.decoder.layers.30.self_attn.softmax:
2197
- approximation_function: NONE
2198
- input_format: SAME
2199
- instance: Softmax
2200
- output_format: SAME
2201
- model.decoder.layers.30.self_attn.v_proj:
2202
- accum_format: SAME
2203
- approximation_function: NONE
2204
- bias_format: SAME
2205
- input_format: BFP[8|8]{64,-1}(SN)
2206
- instance: Linear
2207
- output_format: BFP[8|8]{64,-1}(SN)
2208
- weight_format: BFP[8|8]{64,-1}(SN)
2209
- weight_sparseness: DENSE
2210
- model.decoder.layers.30.self_attn_layer_norm:
2211
- approximation_function: NONE
2212
- bias_format: SAME
2213
- input_format: SAME
2214
- instance: LayerNorm
2215
- output_format: SAME
2216
- weight_format: SAME
2217
- model.decoder.layers.31.activation_fn:
2218
- approximation_function: NONE
2219
- input_format: SAME
2220
- instance: ReLU
2221
- output_format: SAME
2222
- model.decoder.layers.31.dropout:
2223
- approximation_function: NONE
2224
- input_format: SAME
2225
- instance: Dropout
2226
- output_format: SAME
2227
- model.decoder.layers.31.fc1:
2228
- accum_format: SAME
2229
- approximation_function: NONE
2230
- bias_format: SAME
2231
- input_format: BFP[8|8]{64,-1}(SN)
2232
- instance: Linear
2233
- output_format: SAME
2234
- weight_format: BFP[8|8]{64,-1}(SN)
2235
- weight_sparseness: DENSE
2236
- model.decoder.layers.31.fc2:
2237
- accum_format: SAME
2238
- approximation_function: NONE
2239
- bias_format: SAME
2240
- input_format: BFP[8|8]{64,-1}(SN)
2241
- instance: Linear
2242
- output_format: SAME
2243
- weight_format: BFP[8|8]{64,-1}(SN)
2244
- weight_sparseness: DENSE
2245
- model.decoder.layers.31.final_layer_norm:
2246
- approximation_function: NONE
2247
- bias_format: SAME
2248
- input_format: SAME
2249
- instance: LayerNorm
2250
- output_format: SAME
2251
- weight_format: SAME
2252
- model.decoder.layers.31.self_attn.dropout:
2253
- approximation_function: NONE
2254
- input_format: SAME
2255
- instance: Dropout
2256
- output_format: BFP[8|8]{64,-1}(SN)
2257
- model.decoder.layers.31.self_attn.k_proj:
2258
- accum_format: SAME
2259
- approximation_function: NONE
2260
- bias_format: SAME
2261
- input_format: BFP[8|8]{64,-1}(SN)
2262
- instance: Linear
2263
- output_format: BFP[8|8]{64,-1}(SN)
2264
- weight_format: BFP[8|8]{64,-1}(SN)
2265
- weight_sparseness: DENSE
2266
- model.decoder.layers.31.self_attn.out_proj:
2267
- accum_format: SAME
2268
- approximation_function: NONE
2269
- bias_format: SAME
2270
- input_format: BFP[8|8]{64,-1}(SN)
2271
- instance: Linear
2272
- output_format: SAME
2273
- weight_format: BFP[8|8]{64,-1}(SN)
2274
- weight_sparseness: DENSE
2275
- model.decoder.layers.31.self_attn.q_proj:
2276
- accum_format: SAME
2277
- approximation_function: NONE
2278
- bias_format: SAME
2279
- input_format: BFP[8|8]{64,-1}(SN)
2280
- instance: Linear
2281
- output_format: BFP[8|8]{64,-1}(SN)
2282
- weight_format: BFP[8|8]{64,-1}(SN)
2283
- weight_sparseness: DENSE
2284
- model.decoder.layers.31.self_attn.softmax:
2285
- approximation_function: NONE
2286
- input_format: SAME
2287
- instance: Softmax
2288
- output_format: SAME
2289
- model.decoder.layers.31.self_attn.v_proj:
2290
- accum_format: SAME
2291
- approximation_function: NONE
2292
- bias_format: SAME
2293
- input_format: BFP[8|8]{64,-1}(SN)
2294
- instance: Linear
2295
- output_format: BFP[8|8]{64,-1}(SN)
2296
- weight_format: BFP[8|8]{64,-1}(SN)
2297
- weight_sparseness: DENSE
2298
- model.decoder.layers.31.self_attn_layer_norm:
2299
- approximation_function: NONE
2300
- bias_format: SAME
2301
- input_format: SAME
2302
- instance: LayerNorm
2303
- output_format: SAME
2304
- weight_format: SAME
2305
- model.decoder.layers.4.activation_fn:
2306
- approximation_function: NONE
2307
- input_format: SAME
2308
- instance: ReLU
2309
- output_format: SAME
2310
- model.decoder.layers.4.dropout:
2311
- approximation_function: NONE
2312
- input_format: SAME
2313
- instance: Dropout
2314
- output_format: SAME
2315
- model.decoder.layers.4.fc1:
2316
- accum_format: SAME
2317
- approximation_function: NONE
2318
- bias_format: SAME
2319
- input_format: BFP[8|8]{64,-1}(SN)
2320
- instance: Linear
2321
- output_format: SAME
2322
- weight_format: BFP[8|8]{64,-1}(SN)
2323
- weight_sparseness: DENSE
2324
- model.decoder.layers.4.fc2:
2325
- accum_format: SAME
2326
- approximation_function: NONE
2327
- bias_format: SAME
2328
- input_format: BFP[8|8]{64,-1}(SN)
2329
- instance: Linear
2330
- output_format: SAME
2331
- weight_format: BFP[8|8]{64,-1}(SN)
2332
- weight_sparseness: DENSE
2333
- model.decoder.layers.4.final_layer_norm:
2334
- approximation_function: NONE
2335
- bias_format: SAME
2336
- input_format: SAME
2337
- instance: LayerNorm
2338
- output_format: SAME
2339
- weight_format: SAME
2340
- model.decoder.layers.4.self_attn.dropout:
2341
- approximation_function: NONE
2342
- input_format: SAME
2343
- instance: Dropout
2344
- output_format: BFP[8|8]{64,-1}(SN)
2345
- model.decoder.layers.4.self_attn.k_proj:
2346
- accum_format: SAME
2347
- approximation_function: NONE
2348
- bias_format: SAME
2349
- input_format: BFP[8|8]{64,-1}(SN)
2350
- instance: Linear
2351
- output_format: BFP[8|8]{64,-1}(SN)
2352
- weight_format: BFP[8|8]{64,-1}(SN)
2353
- weight_sparseness: DENSE
2354
- model.decoder.layers.4.self_attn.out_proj:
2355
- accum_format: SAME
2356
- approximation_function: NONE
2357
- bias_format: SAME
2358
- input_format: BFP[8|8]{64,-1}(SN)
2359
- instance: Linear
2360
- output_format: SAME
2361
- weight_format: BFP[8|8]{64,-1}(SN)
2362
- weight_sparseness: DENSE
2363
- model.decoder.layers.4.self_attn.q_proj:
2364
- accum_format: SAME
2365
- approximation_function: NONE
2366
- bias_format: SAME
2367
- input_format: BFP[8|8]{64,-1}(SN)
2368
- instance: Linear
2369
- output_format: BFP[8|8]{64,-1}(SN)
2370
- weight_format: BFP[8|8]{64,-1}(SN)
2371
- weight_sparseness: DENSE
2372
- model.decoder.layers.4.self_attn.softmax:
2373
- approximation_function: NONE
2374
- input_format: SAME
2375
- instance: Softmax
2376
- output_format: SAME
2377
- model.decoder.layers.4.self_attn.v_proj:
2378
- accum_format: SAME
2379
- approximation_function: NONE
2380
- bias_format: SAME
2381
- input_format: BFP[8|8]{64,-1}(SN)
2382
- instance: Linear
2383
- output_format: BFP[8|8]{64,-1}(SN)
2384
- weight_format: BFP[8|8]{64,-1}(SN)
2385
- weight_sparseness: DENSE
2386
- model.decoder.layers.4.self_attn_layer_norm:
2387
- approximation_function: NONE
2388
- bias_format: SAME
2389
- input_format: SAME
2390
- instance: LayerNorm
2391
- output_format: SAME
2392
- weight_format: SAME
2393
- model.decoder.layers.5.activation_fn:
2394
- approximation_function: NONE
2395
- input_format: SAME
2396
- instance: ReLU
2397
- output_format: SAME
2398
- model.decoder.layers.5.dropout:
2399
- approximation_function: NONE
2400
- input_format: SAME
2401
- instance: Dropout
2402
- output_format: SAME
2403
- model.decoder.layers.5.fc1:
2404
- accum_format: SAME
2405
- approximation_function: NONE
2406
- bias_format: SAME
2407
- input_format: BFP[8|8]{64,-1}(SN)
2408
- instance: Linear
2409
- output_format: SAME
2410
- weight_format: BFP[8|8]{64,-1}(SN)
2411
- weight_sparseness: DENSE
2412
- model.decoder.layers.5.fc2:
2413
- accum_format: SAME
2414
- approximation_function: NONE
2415
- bias_format: SAME
2416
- input_format: BFP[8|8]{64,-1}(SN)
2417
- instance: Linear
2418
- output_format: SAME
2419
- weight_format: BFP[8|8]{64,-1}(SN)
2420
- weight_sparseness: DENSE
2421
- model.decoder.layers.5.final_layer_norm:
2422
- approximation_function: NONE
2423
- bias_format: SAME
2424
- input_format: SAME
2425
- instance: LayerNorm
2426
- output_format: SAME
2427
- weight_format: SAME
2428
- model.decoder.layers.5.self_attn.dropout:
2429
- approximation_function: NONE
2430
- input_format: SAME
2431
- instance: Dropout
2432
- output_format: BFP[8|8]{64,-1}(SN)
2433
- model.decoder.layers.5.self_attn.k_proj:
2434
- accum_format: SAME
2435
- approximation_function: NONE
2436
- bias_format: SAME
2437
- input_format: BFP[8|8]{64,-1}(SN)
2438
- instance: Linear
2439
- output_format: BFP[8|8]{64,-1}(SN)
2440
- weight_format: BFP[8|8]{64,-1}(SN)
2441
- weight_sparseness: DENSE
2442
- model.decoder.layers.5.self_attn.out_proj:
2443
- accum_format: SAME
2444
- approximation_function: NONE
2445
- bias_format: SAME
2446
- input_format: BFP[8|8]{64,-1}(SN)
2447
- instance: Linear
2448
- output_format: SAME
2449
- weight_format: BFP[8|8]{64,-1}(SN)
2450
- weight_sparseness: DENSE
2451
- model.decoder.layers.5.self_attn.q_proj:
2452
- accum_format: SAME
2453
- approximation_function: NONE
2454
- bias_format: SAME
2455
- input_format: BFP[8|8]{64,-1}(SN)
2456
- instance: Linear
2457
- output_format: BFP[8|8]{64,-1}(SN)
2458
- weight_format: BFP[8|8]{64,-1}(SN)
2459
- weight_sparseness: DENSE
2460
- model.decoder.layers.5.self_attn.softmax:
2461
- approximation_function: NONE
2462
- input_format: SAME
2463
- instance: Softmax
2464
- output_format: SAME
2465
- model.decoder.layers.5.self_attn.v_proj:
2466
- accum_format: SAME
2467
- approximation_function: NONE
2468
- bias_format: SAME
2469
- input_format: BFP[8|8]{64,-1}(SN)
2470
- instance: Linear
2471
- output_format: BFP[8|8]{64,-1}(SN)
2472
- weight_format: BFP[8|8]{64,-1}(SN)
2473
- weight_sparseness: DENSE
2474
- model.decoder.layers.5.self_attn_layer_norm:
2475
- approximation_function: NONE
2476
- bias_format: SAME
2477
- input_format: SAME
2478
- instance: LayerNorm
2479
- output_format: SAME
2480
- weight_format: SAME
2481
- model.decoder.layers.6.activation_fn:
2482
- approximation_function: NONE
2483
- input_format: SAME
2484
- instance: ReLU
2485
- output_format: SAME
2486
- model.decoder.layers.6.dropout:
2487
- approximation_function: NONE
2488
- input_format: SAME
2489
- instance: Dropout
2490
- output_format: SAME
2491
- model.decoder.layers.6.fc1:
2492
- accum_format: SAME
2493
- approximation_function: NONE
2494
- bias_format: SAME
2495
- input_format: BFP[8|8]{64,-1}(SN)
2496
- instance: Linear
2497
- output_format: SAME
2498
- weight_format: BFP[8|8]{64,-1}(SN)
2499
- weight_sparseness: DENSE
2500
- model.decoder.layers.6.fc2:
2501
- accum_format: SAME
2502
- approximation_function: NONE
2503
- bias_format: SAME
2504
- input_format: BFP[8|8]{64,-1}(SN)
2505
- instance: Linear
2506
- output_format: SAME
2507
- weight_format: BFP[8|8]{64,-1}(SN)
2508
- weight_sparseness: DENSE
2509
- model.decoder.layers.6.final_layer_norm:
2510
- approximation_function: NONE
2511
- bias_format: SAME
2512
- input_format: SAME
2513
- instance: LayerNorm
2514
- output_format: SAME
2515
- weight_format: SAME
2516
- model.decoder.layers.6.self_attn.dropout:
2517
- approximation_function: NONE
2518
- input_format: SAME
2519
- instance: Dropout
2520
- output_format: BFP[8|8]{64,-1}(SN)
2521
- model.decoder.layers.6.self_attn.k_proj:
2522
- accum_format: SAME
2523
- approximation_function: NONE
2524
- bias_format: SAME
2525
- input_format: BFP[8|8]{64,-1}(SN)
2526
- instance: Linear
2527
- output_format: BFP[8|8]{64,-1}(SN)
2528
- weight_format: BFP[8|8]{64,-1}(SN)
2529
- weight_sparseness: DENSE
2530
- model.decoder.layers.6.self_attn.out_proj:
2531
- accum_format: SAME
2532
- approximation_function: NONE
2533
- bias_format: SAME
2534
- input_format: BFP[8|8]{64,-1}(SN)
2535
- instance: Linear
2536
- output_format: SAME
2537
- weight_format: BFP[8|8]{64,-1}(SN)
2538
- weight_sparseness: DENSE
2539
- model.decoder.layers.6.self_attn.q_proj:
2540
- accum_format: SAME
2541
- approximation_function: NONE
2542
- bias_format: SAME
2543
- input_format: BFP[8|8]{64,-1}(SN)
2544
- instance: Linear
2545
- output_format: BFP[8|8]{64,-1}(SN)
2546
- weight_format: BFP[8|8]{64,-1}(SN)
2547
- weight_sparseness: DENSE
2548
- model.decoder.layers.6.self_attn.softmax:
2549
- approximation_function: NONE
2550
- input_format: SAME
2551
- instance: Softmax
2552
- output_format: SAME
2553
- model.decoder.layers.6.self_attn.v_proj:
2554
- accum_format: SAME
2555
- approximation_function: NONE
2556
- bias_format: SAME
2557
- input_format: BFP[8|8]{64,-1}(SN)
2558
- instance: Linear
2559
- output_format: BFP[8|8]{64,-1}(SN)
2560
- weight_format: BFP[8|8]{64,-1}(SN)
2561
- weight_sparseness: DENSE
2562
- model.decoder.layers.6.self_attn_layer_norm:
2563
- approximation_function: NONE
2564
- bias_format: SAME
2565
- input_format: SAME
2566
- instance: LayerNorm
2567
- output_format: SAME
2568
- weight_format: SAME
2569
- model.decoder.layers.7.activation_fn:
2570
- approximation_function: NONE
2571
- input_format: SAME
2572
- instance: ReLU
2573
- output_format: SAME
2574
- model.decoder.layers.7.dropout:
2575
- approximation_function: NONE
2576
- input_format: SAME
2577
- instance: Dropout
2578
- output_format: SAME
2579
- model.decoder.layers.7.fc1:
2580
- accum_format: SAME
2581
- approximation_function: NONE
2582
- bias_format: SAME
2583
- input_format: BFP[8|8]{64,-1}(SN)
2584
- instance: Linear
2585
- output_format: SAME
2586
- weight_format: BFP[8|8]{64,-1}(SN)
2587
- weight_sparseness: DENSE
2588
- model.decoder.layers.7.fc2:
2589
- accum_format: SAME
2590
- approximation_function: NONE
2591
- bias_format: SAME
2592
- input_format: BFP[8|8]{64,-1}(SN)
2593
- instance: Linear
2594
- output_format: SAME
2595
- weight_format: BFP[8|8]{64,-1}(SN)
2596
- weight_sparseness: DENSE
2597
- model.decoder.layers.7.final_layer_norm:
2598
- approximation_function: NONE
2599
- bias_format: SAME
2600
- input_format: SAME
2601
- instance: LayerNorm
2602
- output_format: SAME
2603
- weight_format: SAME
2604
- model.decoder.layers.7.self_attn.dropout:
2605
- approximation_function: NONE
2606
- input_format: SAME
2607
- instance: Dropout
2608
- output_format: BFP[8|8]{64,-1}(SN)
2609
- model.decoder.layers.7.self_attn.k_proj:
2610
- accum_format: SAME
2611
- approximation_function: NONE
2612
- bias_format: SAME
2613
- input_format: BFP[8|8]{64,-1}(SN)
2614
- instance: Linear
2615
- output_format: BFP[8|8]{64,-1}(SN)
2616
- weight_format: BFP[8|8]{64,-1}(SN)
2617
- weight_sparseness: DENSE
2618
- model.decoder.layers.7.self_attn.out_proj:
2619
- accum_format: SAME
2620
- approximation_function: NONE
2621
- bias_format: SAME
2622
- input_format: BFP[8|8]{64,-1}(SN)
2623
- instance: Linear
2624
- output_format: SAME
2625
- weight_format: BFP[8|8]{64,-1}(SN)
2626
- weight_sparseness: DENSE
2627
- model.decoder.layers.7.self_attn.q_proj:
2628
- accum_format: SAME
2629
- approximation_function: NONE
2630
- bias_format: SAME
2631
- input_format: BFP[8|8]{64,-1}(SN)
2632
- instance: Linear
2633
- output_format: BFP[8|8]{64,-1}(SN)
2634
- weight_format: BFP[8|8]{64,-1}(SN)
2635
- weight_sparseness: DENSE
2636
- model.decoder.layers.7.self_attn.softmax:
2637
- approximation_function: NONE
2638
- input_format: SAME
2639
- instance: Softmax
2640
- output_format: SAME
2641
- model.decoder.layers.7.self_attn.v_proj:
2642
- accum_format: SAME
2643
- approximation_function: NONE
2644
- bias_format: SAME
2645
- input_format: BFP[8|8]{64,-1}(SN)
2646
- instance: Linear
2647
- output_format: BFP[8|8]{64,-1}(SN)
2648
- weight_format: BFP[8|8]{64,-1}(SN)
2649
- weight_sparseness: DENSE
2650
- model.decoder.layers.7.self_attn_layer_norm:
2651
- approximation_function: NONE
2652
- bias_format: SAME
2653
- input_format: SAME
2654
- instance: LayerNorm
2655
- output_format: SAME
2656
- weight_format: SAME
2657
- model.decoder.layers.8.activation_fn:
2658
- approximation_function: NONE
2659
- input_format: SAME
2660
- instance: ReLU
2661
- output_format: SAME
2662
- model.decoder.layers.8.dropout:
2663
- approximation_function: NONE
2664
- input_format: SAME
2665
- instance: Dropout
2666
- output_format: SAME
2667
- model.decoder.layers.8.fc1:
2668
- accum_format: SAME
2669
- approximation_function: NONE
2670
- bias_format: SAME
2671
- input_format: BFP[8|8]{64,-1}(SN)
2672
- instance: Linear
2673
- output_format: SAME
2674
- weight_format: BFP[8|8]{64,-1}(SN)
2675
- weight_sparseness: DENSE
2676
- model.decoder.layers.8.fc2:
2677
- accum_format: SAME
2678
- approximation_function: NONE
2679
- bias_format: SAME
2680
- input_format: BFP[8|8]{64,-1}(SN)
2681
- instance: Linear
2682
- output_format: SAME
2683
- weight_format: BFP[8|8]{64,-1}(SN)
2684
- weight_sparseness: DENSE
2685
- model.decoder.layers.8.final_layer_norm:
2686
- approximation_function: NONE
2687
- bias_format: SAME
2688
- input_format: SAME
2689
- instance: LayerNorm
2690
- output_format: SAME
2691
- weight_format: SAME
2692
- model.decoder.layers.8.self_attn.dropout:
2693
- approximation_function: NONE
2694
- input_format: SAME
2695
- instance: Dropout
2696
- output_format: BFP[8|8]{64,-1}(SN)
2697
- model.decoder.layers.8.self_attn.k_proj:
2698
- accum_format: SAME
2699
- approximation_function: NONE
2700
- bias_format: SAME
2701
- input_format: BFP[8|8]{64,-1}(SN)
2702
- instance: Linear
2703
- output_format: BFP[8|8]{64,-1}(SN)
2704
- weight_format: BFP[8|8]{64,-1}(SN)
2705
- weight_sparseness: DENSE
2706
- model.decoder.layers.8.self_attn.out_proj:
2707
- accum_format: SAME
2708
- approximation_function: NONE
2709
- bias_format: SAME
2710
- input_format: BFP[8|8]{64,-1}(SN)
2711
- instance: Linear
2712
- output_format: SAME
2713
- weight_format: BFP[8|8]{64,-1}(SN)
2714
- weight_sparseness: DENSE
2715
- model.decoder.layers.8.self_attn.q_proj:
2716
- accum_format: SAME
2717
- approximation_function: NONE
2718
- bias_format: SAME
2719
- input_format: BFP[8|8]{64,-1}(SN)
2720
- instance: Linear
2721
- output_format: BFP[8|8]{64,-1}(SN)
2722
- weight_format: BFP[8|8]{64,-1}(SN)
2723
- weight_sparseness: DENSE
2724
- model.decoder.layers.8.self_attn.softmax:
2725
- approximation_function: NONE
2726
- input_format: SAME
2727
- instance: Softmax
2728
- output_format: SAME
2729
- model.decoder.layers.8.self_attn.v_proj:
2730
- accum_format: SAME
2731
- approximation_function: NONE
2732
- bias_format: SAME
2733
- input_format: BFP[8|8]{64,-1}(SN)
2734
- instance: Linear
2735
- output_format: BFP[8|8]{64,-1}(SN)
2736
- weight_format: BFP[8|8]{64,-1}(SN)
2737
- weight_sparseness: DENSE
2738
- model.decoder.layers.8.self_attn_layer_norm:
2739
- approximation_function: NONE
2740
- bias_format: SAME
2741
- input_format: SAME
2742
- instance: LayerNorm
2743
- output_format: SAME
2744
- weight_format: SAME
2745
- model.decoder.layers.9.activation_fn:
2746
- approximation_function: NONE
2747
- input_format: SAME
2748
- instance: ReLU
2749
- output_format: SAME
2750
- model.decoder.layers.9.dropout:
2751
- approximation_function: NONE
2752
- input_format: SAME
2753
- instance: Dropout
2754
- output_format: SAME
2755
- model.decoder.layers.9.fc1:
2756
- accum_format: SAME
2757
- approximation_function: NONE
2758
- bias_format: SAME
2759
- input_format: BFP[8|8]{64,-1}(SN)
2760
- instance: Linear
2761
- output_format: SAME
2762
- weight_format: BFP[8|8]{64,-1}(SN)
2763
- weight_sparseness: DENSE
2764
- model.decoder.layers.9.fc2:
2765
- accum_format: SAME
2766
- approximation_function: NONE
2767
- bias_format: SAME
2768
- input_format: BFP[8|8]{64,-1}(SN)
2769
- instance: Linear
2770
- output_format: SAME
2771
- weight_format: BFP[8|8]{64,-1}(SN)
2772
- weight_sparseness: DENSE
2773
- model.decoder.layers.9.final_layer_norm:
2774
- approximation_function: NONE
2775
- bias_format: SAME
2776
- input_format: SAME
2777
- instance: LayerNorm
2778
- output_format: SAME
2779
- weight_format: SAME
2780
- model.decoder.layers.9.self_attn.dropout:
2781
- approximation_function: NONE
2782
- input_format: SAME
2783
- instance: Dropout
2784
- output_format: BFP[8|8]{64,-1}(SN)
2785
- model.decoder.layers.9.self_attn.k_proj:
2786
- accum_format: SAME
2787
- approximation_function: NONE
2788
- bias_format: SAME
2789
- input_format: BFP[8|8]{64,-1}(SN)
2790
- instance: Linear
2791
- output_format: BFP[8|8]{64,-1}(SN)
2792
- weight_format: BFP[8|8]{64,-1}(SN)
2793
- weight_sparseness: DENSE
2794
- model.decoder.layers.9.self_attn.out_proj:
2795
- accum_format: SAME
2796
- approximation_function: NONE
2797
- bias_format: SAME
2798
- input_format: BFP[8|8]{64,-1}(SN)
2799
- instance: Linear
2800
- output_format: SAME
2801
- weight_format: BFP[8|8]{64,-1}(SN)
2802
- weight_sparseness: DENSE
2803
- model.decoder.layers.9.self_attn.q_proj:
2804
- accum_format: SAME
2805
- approximation_function: NONE
2806
- bias_format: SAME
2807
- input_format: BFP[8|8]{64,-1}(SN)
2808
- instance: Linear
2809
- output_format: BFP[8|8]{64,-1}(SN)
2810
- weight_format: BFP[8|8]{64,-1}(SN)
2811
- weight_sparseness: DENSE
2812
- model.decoder.layers.9.self_attn.softmax:
2813
- approximation_function: NONE
2814
- input_format: SAME
2815
- instance: Softmax
2816
- output_format: SAME
2817
- model.decoder.layers.9.self_attn.v_proj:
2818
- accum_format: SAME
2819
- approximation_function: NONE
2820
- bias_format: SAME
2821
- input_format: BFP[8|8]{64,-1}(SN)
2822
- instance: Linear
2823
- output_format: BFP[8|8]{64,-1}(SN)
2824
- weight_format: BFP[8|8]{64,-1}(SN)
2825
- weight_sparseness: DENSE
2826
- model.decoder.layers.9.self_attn_layer_norm:
2827
- approximation_function: NONE
2828
- bias_format: SAME
2829
- input_format: SAME
2830
- instance: LayerNorm
2831
- output_format: SAME
2832
- weight_format: SAME