wanzin commited on
Commit
63fa643
·
1 Parent(s): b8e54e6

update BASIC config

Browse files
Files changed (1) hide show
  1. configs/BASIC.yaml +97 -97
configs/BASIC.yaml CHANGED
@@ -8,7 +8,7 @@ model:
8
  weight_format: SAME
9
  weight_sparseness: DENSE
10
  model.decoder.final_layer_norm:
11
- approximation_function: LAYERNORM(fallback,4,float16)
12
  bias_format: SAME
13
  input_format: SAME
14
  instance: LayerNorm
@@ -43,7 +43,7 @@ model:
43
  weight_format: BFP[8|8]{64,-1}(SN)
44
  weight_sparseness: DENSE
45
  model.decoder.layers.0.final_layer_norm:
46
- approximation_function: LAYERNORM(fallback,4,float16)
47
  bias_format: SAME
48
  input_format: SAME
49
  instance: LayerNorm
@@ -82,7 +82,7 @@ model:
82
  weight_format: BFP[8|8]{64,-1}(SN)
83
  weight_sparseness: DENSE
84
  model.decoder.layers.0.self_attn.softmax:
85
- approximation_function: SOFTMAX(base2,float16)
86
  input_format: SAME
87
  instance: Softmax
88
  output_format: SAME
@@ -96,7 +96,7 @@ model:
96
  weight_format: BFP[8|8]{64,-1}(SN)
97
  weight_sparseness: DENSE
98
  model.decoder.layers.0.self_attn_layer_norm:
99
- approximation_function: LAYERNORM(fallback,4,float16)
100
  bias_format: SAME
101
  input_format: SAME
102
  instance: LayerNorm
@@ -131,7 +131,7 @@ model:
131
  weight_format: BFP[8|8]{64,-1}(SN)
132
  weight_sparseness: DENSE
133
  model.decoder.layers.1.final_layer_norm:
134
- approximation_function: LAYERNORM(fallback,4,float16)
135
  bias_format: SAME
136
  input_format: SAME
137
  instance: LayerNorm
@@ -170,7 +170,7 @@ model:
170
  weight_format: BFP[8|8]{64,-1}(SN)
171
  weight_sparseness: DENSE
172
  model.decoder.layers.1.self_attn.softmax:
173
- approximation_function: SOFTMAX(base2,float16)
174
  input_format: SAME
175
  instance: Softmax
176
  output_format: SAME
@@ -184,7 +184,7 @@ model:
184
  weight_format: BFP[8|8]{64,-1}(SN)
185
  weight_sparseness: DENSE
186
  model.decoder.layers.1.self_attn_layer_norm:
187
- approximation_function: LAYERNORM(fallback,4,float16)
188
  bias_format: SAME
189
  input_format: SAME
190
  instance: LayerNorm
@@ -219,7 +219,7 @@ model:
219
  weight_format: BFP[8|8]{64,-1}(SN)
220
  weight_sparseness: DENSE
221
  model.decoder.layers.10.final_layer_norm:
222
- approximation_function: LAYERNORM(fallback,4,float16)
223
  bias_format: SAME
224
  input_format: SAME
225
  instance: LayerNorm
@@ -258,7 +258,7 @@ model:
258
  weight_format: BFP[8|8]{64,-1}(SN)
259
  weight_sparseness: DENSE
260
  model.decoder.layers.10.self_attn.softmax:
261
- approximation_function: SOFTMAX(base2,float16)
262
  input_format: SAME
263
  instance: Softmax
264
  output_format: SAME
@@ -272,7 +272,7 @@ model:
272
  weight_format: BFP[8|8]{64,-1}(SN)
273
  weight_sparseness: DENSE
274
  model.decoder.layers.10.self_attn_layer_norm:
275
- approximation_function: LAYERNORM(fallback,4,float16)
276
  bias_format: SAME
277
  input_format: SAME
278
  instance: LayerNorm
@@ -307,7 +307,7 @@ model:
307
  weight_format: BFP[8|8]{64,-1}(SN)
308
  weight_sparseness: DENSE
309
  model.decoder.layers.11.final_layer_norm:
310
- approximation_function: LAYERNORM(fallback,4,float16)
311
  bias_format: SAME
312
  input_format: SAME
313
  instance: LayerNorm
@@ -346,7 +346,7 @@ model:
346
  weight_format: BFP[8|8]{64,-1}(SN)
347
  weight_sparseness: DENSE
348
  model.decoder.layers.11.self_attn.softmax:
349
- approximation_function: SOFTMAX(base2,float16)
350
  input_format: SAME
351
  instance: Softmax
352
  output_format: SAME
@@ -360,7 +360,7 @@ model:
360
  weight_format: BFP[8|8]{64,-1}(SN)
361
  weight_sparseness: DENSE
362
  model.decoder.layers.11.self_attn_layer_norm:
363
- approximation_function: LAYERNORM(fallback,4,float16)
364
  bias_format: SAME
365
  input_format: SAME
366
  instance: LayerNorm
@@ -395,7 +395,7 @@ model:
395
  weight_format: BFP[8|8]{64,-1}(SN)
396
  weight_sparseness: DENSE
397
  model.decoder.layers.12.final_layer_norm:
398
- approximation_function: LAYERNORM(fallback,4,float16)
399
  bias_format: SAME
400
  input_format: SAME
401
  instance: LayerNorm
@@ -434,7 +434,7 @@ model:
434
  weight_format: BFP[8|8]{64,-1}(SN)
435
  weight_sparseness: DENSE
436
  model.decoder.layers.12.self_attn.softmax:
437
- approximation_function: SOFTMAX(base2,float16)
438
  input_format: SAME
439
  instance: Softmax
440
  output_format: SAME
@@ -448,7 +448,7 @@ model:
448
  weight_format: BFP[8|8]{64,-1}(SN)
449
  weight_sparseness: DENSE
450
  model.decoder.layers.12.self_attn_layer_norm:
451
- approximation_function: LAYERNORM(fallback,4,float16)
452
  bias_format: SAME
453
  input_format: SAME
454
  instance: LayerNorm
@@ -483,7 +483,7 @@ model:
483
  weight_format: BFP[8|8]{64,-1}(SN)
484
  weight_sparseness: DENSE
485
  model.decoder.layers.13.final_layer_norm:
486
- approximation_function: LAYERNORM(fallback,4,float16)
487
  bias_format: SAME
488
  input_format: SAME
489
  instance: LayerNorm
@@ -522,7 +522,7 @@ model:
522
  weight_format: BFP[8|8]{64,-1}(SN)
523
  weight_sparseness: DENSE
524
  model.decoder.layers.13.self_attn.softmax:
525
- approximation_function: SOFTMAX(base2,float16)
526
  input_format: SAME
527
  instance: Softmax
528
  output_format: SAME
@@ -536,7 +536,7 @@ model:
536
  weight_format: BFP[8|8]{64,-1}(SN)
537
  weight_sparseness: DENSE
538
  model.decoder.layers.13.self_attn_layer_norm:
539
- approximation_function: LAYERNORM(fallback,4,float16)
540
  bias_format: SAME
541
  input_format: SAME
542
  instance: LayerNorm
@@ -571,7 +571,7 @@ model:
571
  weight_format: BFP[8|8]{64,-1}(SN)
572
  weight_sparseness: DENSE
573
  model.decoder.layers.14.final_layer_norm:
574
- approximation_function: LAYERNORM(fallback,4,float16)
575
  bias_format: SAME
576
  input_format: SAME
577
  instance: LayerNorm
@@ -610,7 +610,7 @@ model:
610
  weight_format: BFP[8|8]{64,-1}(SN)
611
  weight_sparseness: DENSE
612
  model.decoder.layers.14.self_attn.softmax:
613
- approximation_function: SOFTMAX(base2,float16)
614
  input_format: SAME
615
  instance: Softmax
616
  output_format: SAME
@@ -624,7 +624,7 @@ model:
624
  weight_format: BFP[8|8]{64,-1}(SN)
625
  weight_sparseness: DENSE
626
  model.decoder.layers.14.self_attn_layer_norm:
627
- approximation_function: LAYERNORM(fallback,4,float16)
628
  bias_format: SAME
629
  input_format: SAME
630
  instance: LayerNorm
@@ -659,7 +659,7 @@ model:
659
  weight_format: BFP[8|8]{64,-1}(SN)
660
  weight_sparseness: DENSE
661
  model.decoder.layers.15.final_layer_norm:
662
- approximation_function: LAYERNORM(fallback,4,float16)
663
  bias_format: SAME
664
  input_format: SAME
665
  instance: LayerNorm
@@ -698,7 +698,7 @@ model:
698
  weight_format: BFP[8|8]{64,-1}(SN)
699
  weight_sparseness: DENSE
700
  model.decoder.layers.15.self_attn.softmax:
701
- approximation_function: SOFTMAX(base2,float16)
702
  input_format: SAME
703
  instance: Softmax
704
  output_format: SAME
@@ -712,7 +712,7 @@ model:
712
  weight_format: BFP[8|8]{64,-1}(SN)
713
  weight_sparseness: DENSE
714
  model.decoder.layers.15.self_attn_layer_norm:
715
- approximation_function: LAYERNORM(fallback,4,float16)
716
  bias_format: SAME
717
  input_format: SAME
718
  instance: LayerNorm
@@ -747,7 +747,7 @@ model:
747
  weight_format: BFP[8|8]{64,-1}(SN)
748
  weight_sparseness: DENSE
749
  model.decoder.layers.16.final_layer_norm:
750
- approximation_function: LAYERNORM(fallback,4,float16)
751
  bias_format: SAME
752
  input_format: SAME
753
  instance: LayerNorm
@@ -786,7 +786,7 @@ model:
786
  weight_format: BFP[8|8]{64,-1}(SN)
787
  weight_sparseness: DENSE
788
  model.decoder.layers.16.self_attn.softmax:
789
- approximation_function: SOFTMAX(base2,float16)
790
  input_format: SAME
791
  instance: Softmax
792
  output_format: SAME
@@ -800,7 +800,7 @@ model:
800
  weight_format: BFP[8|8]{64,-1}(SN)
801
  weight_sparseness: DENSE
802
  model.decoder.layers.16.self_attn_layer_norm:
803
- approximation_function: LAYERNORM(fallback,4,float16)
804
  bias_format: SAME
805
  input_format: SAME
806
  instance: LayerNorm
@@ -835,7 +835,7 @@ model:
835
  weight_format: BFP[8|8]{64,-1}(SN)
836
  weight_sparseness: DENSE
837
  model.decoder.layers.17.final_layer_norm:
838
- approximation_function: LAYERNORM(fallback,4,float16)
839
  bias_format: SAME
840
  input_format: SAME
841
  instance: LayerNorm
@@ -874,7 +874,7 @@ model:
874
  weight_format: BFP[8|8]{64,-1}(SN)
875
  weight_sparseness: DENSE
876
  model.decoder.layers.17.self_attn.softmax:
877
- approximation_function: SOFTMAX(base2,float16)
878
  input_format: SAME
879
  instance: Softmax
880
  output_format: SAME
@@ -888,7 +888,7 @@ model:
888
  weight_format: BFP[8|8]{64,-1}(SN)
889
  weight_sparseness: DENSE
890
  model.decoder.layers.17.self_attn_layer_norm:
891
- approximation_function: LAYERNORM(fallback,4,float16)
892
  bias_format: SAME
893
  input_format: SAME
894
  instance: LayerNorm
@@ -923,7 +923,7 @@ model:
923
  weight_format: BFP[8|8]{64,-1}(SN)
924
  weight_sparseness: DENSE
925
  model.decoder.layers.18.final_layer_norm:
926
- approximation_function: LAYERNORM(fallback,4,float16)
927
  bias_format: SAME
928
  input_format: SAME
929
  instance: LayerNorm
@@ -962,7 +962,7 @@ model:
962
  weight_format: BFP[8|8]{64,-1}(SN)
963
  weight_sparseness: DENSE
964
  model.decoder.layers.18.self_attn.softmax:
965
- approximation_function: SOFTMAX(base2,float16)
966
  input_format: SAME
967
  instance: Softmax
968
  output_format: SAME
@@ -976,7 +976,7 @@ model:
976
  weight_format: BFP[8|8]{64,-1}(SN)
977
  weight_sparseness: DENSE
978
  model.decoder.layers.18.self_attn_layer_norm:
979
- approximation_function: LAYERNORM(fallback,4,float16)
980
  bias_format: SAME
981
  input_format: SAME
982
  instance: LayerNorm
@@ -1011,7 +1011,7 @@ model:
1011
  weight_format: BFP[8|8]{64,-1}(SN)
1012
  weight_sparseness: DENSE
1013
  model.decoder.layers.19.final_layer_norm:
1014
- approximation_function: LAYERNORM(fallback,4,float16)
1015
  bias_format: SAME
1016
  input_format: SAME
1017
  instance: LayerNorm
@@ -1050,7 +1050,7 @@ model:
1050
  weight_format: BFP[8|8]{64,-1}(SN)
1051
  weight_sparseness: DENSE
1052
  model.decoder.layers.19.self_attn.softmax:
1053
- approximation_function: SOFTMAX(base2,float16)
1054
  input_format: SAME
1055
  instance: Softmax
1056
  output_format: SAME
@@ -1064,7 +1064,7 @@ model:
1064
  weight_format: BFP[8|8]{64,-1}(SN)
1065
  weight_sparseness: DENSE
1066
  model.decoder.layers.19.self_attn_layer_norm:
1067
- approximation_function: LAYERNORM(fallback,4,float16)
1068
  bias_format: SAME
1069
  input_format: SAME
1070
  instance: LayerNorm
@@ -1099,7 +1099,7 @@ model:
1099
  weight_format: BFP[8|8]{64,-1}(SN)
1100
  weight_sparseness: DENSE
1101
  model.decoder.layers.2.final_layer_norm:
1102
- approximation_function: LAYERNORM(fallback,4,float16)
1103
  bias_format: SAME
1104
  input_format: SAME
1105
  instance: LayerNorm
@@ -1138,7 +1138,7 @@ model:
1138
  weight_format: BFP[8|8]{64,-1}(SN)
1139
  weight_sparseness: DENSE
1140
  model.decoder.layers.2.self_attn.softmax:
1141
- approximation_function: SOFTMAX(base2,float16)
1142
  input_format: SAME
1143
  instance: Softmax
1144
  output_format: SAME
@@ -1152,7 +1152,7 @@ model:
1152
  weight_format: BFP[8|8]{64,-1}(SN)
1153
  weight_sparseness: DENSE
1154
  model.decoder.layers.2.self_attn_layer_norm:
1155
- approximation_function: LAYERNORM(fallback,4,float16)
1156
  bias_format: SAME
1157
  input_format: SAME
1158
  instance: LayerNorm
@@ -1187,7 +1187,7 @@ model:
1187
  weight_format: BFP[8|8]{64,-1}(SN)
1188
  weight_sparseness: DENSE
1189
  model.decoder.layers.20.final_layer_norm:
1190
- approximation_function: LAYERNORM(fallback,4,float16)
1191
  bias_format: SAME
1192
  input_format: SAME
1193
  instance: LayerNorm
@@ -1226,7 +1226,7 @@ model:
1226
  weight_format: BFP[8|8]{64,-1}(SN)
1227
  weight_sparseness: DENSE
1228
  model.decoder.layers.20.self_attn.softmax:
1229
- approximation_function: SOFTMAX(base2,float16)
1230
  input_format: SAME
1231
  instance: Softmax
1232
  output_format: SAME
@@ -1240,7 +1240,7 @@ model:
1240
  weight_format: BFP[8|8]{64,-1}(SN)
1241
  weight_sparseness: DENSE
1242
  model.decoder.layers.20.self_attn_layer_norm:
1243
- approximation_function: LAYERNORM(fallback,4,float16)
1244
  bias_format: SAME
1245
  input_format: SAME
1246
  instance: LayerNorm
@@ -1275,7 +1275,7 @@ model:
1275
  weight_format: BFP[8|8]{64,-1}(SN)
1276
  weight_sparseness: DENSE
1277
  model.decoder.layers.21.final_layer_norm:
1278
- approximation_function: LAYERNORM(fallback,4,float16)
1279
  bias_format: SAME
1280
  input_format: SAME
1281
  instance: LayerNorm
@@ -1314,7 +1314,7 @@ model:
1314
  weight_format: BFP[8|8]{64,-1}(SN)
1315
  weight_sparseness: DENSE
1316
  model.decoder.layers.21.self_attn.softmax:
1317
- approximation_function: SOFTMAX(base2,float16)
1318
  input_format: SAME
1319
  instance: Softmax
1320
  output_format: SAME
@@ -1328,7 +1328,7 @@ model:
1328
  weight_format: BFP[8|8]{64,-1}(SN)
1329
  weight_sparseness: DENSE
1330
  model.decoder.layers.21.self_attn_layer_norm:
1331
- approximation_function: LAYERNORM(fallback,4,float16)
1332
  bias_format: SAME
1333
  input_format: SAME
1334
  instance: LayerNorm
@@ -1363,7 +1363,7 @@ model:
1363
  weight_format: BFP[8|8]{64,-1}(SN)
1364
  weight_sparseness: DENSE
1365
  model.decoder.layers.22.final_layer_norm:
1366
- approximation_function: LAYERNORM(fallback,4,float16)
1367
  bias_format: SAME
1368
  input_format: SAME
1369
  instance: LayerNorm
@@ -1402,7 +1402,7 @@ model:
1402
  weight_format: BFP[8|8]{64,-1}(SN)
1403
  weight_sparseness: DENSE
1404
  model.decoder.layers.22.self_attn.softmax:
1405
- approximation_function: SOFTMAX(base2,float16)
1406
  input_format: SAME
1407
  instance: Softmax
1408
  output_format: SAME
@@ -1416,7 +1416,7 @@ model:
1416
  weight_format: BFP[8|8]{64,-1}(SN)
1417
  weight_sparseness: DENSE
1418
  model.decoder.layers.22.self_attn_layer_norm:
1419
- approximation_function: LAYERNORM(fallback,4,float16)
1420
  bias_format: SAME
1421
  input_format: SAME
1422
  instance: LayerNorm
@@ -1451,7 +1451,7 @@ model:
1451
  weight_format: BFP[8|8]{64,-1}(SN)
1452
  weight_sparseness: DENSE
1453
  model.decoder.layers.23.final_layer_norm:
1454
- approximation_function: LAYERNORM(fallback,4,float16)
1455
  bias_format: SAME
1456
  input_format: SAME
1457
  instance: LayerNorm
@@ -1490,7 +1490,7 @@ model:
1490
  weight_format: BFP[8|8]{64,-1}(SN)
1491
  weight_sparseness: DENSE
1492
  model.decoder.layers.23.self_attn.softmax:
1493
- approximation_function: SOFTMAX(base2,float16)
1494
  input_format: SAME
1495
  instance: Softmax
1496
  output_format: SAME
@@ -1504,7 +1504,7 @@ model:
1504
  weight_format: BFP[8|8]{64,-1}(SN)
1505
  weight_sparseness: DENSE
1506
  model.decoder.layers.23.self_attn_layer_norm:
1507
- approximation_function: LAYERNORM(fallback,4,float16)
1508
  bias_format: SAME
1509
  input_format: SAME
1510
  instance: LayerNorm
@@ -1539,7 +1539,7 @@ model:
1539
  weight_format: BFP[8|8]{64,-1}(SN)
1540
  weight_sparseness: DENSE
1541
  model.decoder.layers.24.final_layer_norm:
1542
- approximation_function: LAYERNORM(fallback,4,float16)
1543
  bias_format: SAME
1544
  input_format: SAME
1545
  instance: LayerNorm
@@ -1578,7 +1578,7 @@ model:
1578
  weight_format: BFP[8|8]{64,-1}(SN)
1579
  weight_sparseness: DENSE
1580
  model.decoder.layers.24.self_attn.softmax:
1581
- approximation_function: SOFTMAX(base2,float16)
1582
  input_format: SAME
1583
  instance: Softmax
1584
  output_format: SAME
@@ -1592,7 +1592,7 @@ model:
1592
  weight_format: BFP[8|8]{64,-1}(SN)
1593
  weight_sparseness: DENSE
1594
  model.decoder.layers.24.self_attn_layer_norm:
1595
- approximation_function: LAYERNORM(fallback,4,float16)
1596
  bias_format: SAME
1597
  input_format: SAME
1598
  instance: LayerNorm
@@ -1627,7 +1627,7 @@ model:
1627
  weight_format: BFP[8|8]{64,-1}(SN)
1628
  weight_sparseness: DENSE
1629
  model.decoder.layers.25.final_layer_norm:
1630
- approximation_function: LAYERNORM(fallback,4,float16)
1631
  bias_format: SAME
1632
  input_format: SAME
1633
  instance: LayerNorm
@@ -1666,7 +1666,7 @@ model:
1666
  weight_format: BFP[8|8]{64,-1}(SN)
1667
  weight_sparseness: DENSE
1668
  model.decoder.layers.25.self_attn.softmax:
1669
- approximation_function: SOFTMAX(base2,float16)
1670
  input_format: SAME
1671
  instance: Softmax
1672
  output_format: SAME
@@ -1680,7 +1680,7 @@ model:
1680
  weight_format: BFP[8|8]{64,-1}(SN)
1681
  weight_sparseness: DENSE
1682
  model.decoder.layers.25.self_attn_layer_norm:
1683
- approximation_function: LAYERNORM(fallback,4,float16)
1684
  bias_format: SAME
1685
  input_format: SAME
1686
  instance: LayerNorm
@@ -1715,7 +1715,7 @@ model:
1715
  weight_format: BFP[8|8]{64,-1}(SN)
1716
  weight_sparseness: DENSE
1717
  model.decoder.layers.26.final_layer_norm:
1718
- approximation_function: LAYERNORM(fallback,4,float16)
1719
  bias_format: SAME
1720
  input_format: SAME
1721
  instance: LayerNorm
@@ -1754,7 +1754,7 @@ model:
1754
  weight_format: BFP[8|8]{64,-1}(SN)
1755
  weight_sparseness: DENSE
1756
  model.decoder.layers.26.self_attn.softmax:
1757
- approximation_function: SOFTMAX(base2,float16)
1758
  input_format: SAME
1759
  instance: Softmax
1760
  output_format: SAME
@@ -1768,7 +1768,7 @@ model:
1768
  weight_format: BFP[8|8]{64,-1}(SN)
1769
  weight_sparseness: DENSE
1770
  model.decoder.layers.26.self_attn_layer_norm:
1771
- approximation_function: LAYERNORM(fallback,4,float16)
1772
  bias_format: SAME
1773
  input_format: SAME
1774
  instance: LayerNorm
@@ -1803,7 +1803,7 @@ model:
1803
  weight_format: BFP[8|8]{64,-1}(SN)
1804
  weight_sparseness: DENSE
1805
  model.decoder.layers.27.final_layer_norm:
1806
- approximation_function: LAYERNORM(fallback,4,float16)
1807
  bias_format: SAME
1808
  input_format: SAME
1809
  instance: LayerNorm
@@ -1842,7 +1842,7 @@ model:
1842
  weight_format: BFP[8|8]{64,-1}(SN)
1843
  weight_sparseness: DENSE
1844
  model.decoder.layers.27.self_attn.softmax:
1845
- approximation_function: SOFTMAX(base2,float16)
1846
  input_format: SAME
1847
  instance: Softmax
1848
  output_format: SAME
@@ -1856,7 +1856,7 @@ model:
1856
  weight_format: BFP[8|8]{64,-1}(SN)
1857
  weight_sparseness: DENSE
1858
  model.decoder.layers.27.self_attn_layer_norm:
1859
- approximation_function: LAYERNORM(fallback,4,float16)
1860
  bias_format: SAME
1861
  input_format: SAME
1862
  instance: LayerNorm
@@ -1891,7 +1891,7 @@ model:
1891
  weight_format: BFP[8|8]{64,-1}(SN)
1892
  weight_sparseness: DENSE
1893
  model.decoder.layers.28.final_layer_norm:
1894
- approximation_function: LAYERNORM(fallback,4,float16)
1895
  bias_format: SAME
1896
  input_format: SAME
1897
  instance: LayerNorm
@@ -1930,7 +1930,7 @@ model:
1930
  weight_format: BFP[8|8]{64,-1}(SN)
1931
  weight_sparseness: DENSE
1932
  model.decoder.layers.28.self_attn.softmax:
1933
- approximation_function: SOFTMAX(base2,float16)
1934
  input_format: SAME
1935
  instance: Softmax
1936
  output_format: SAME
@@ -1944,7 +1944,7 @@ model:
1944
  weight_format: BFP[8|8]{64,-1}(SN)
1945
  weight_sparseness: DENSE
1946
  model.decoder.layers.28.self_attn_layer_norm:
1947
- approximation_function: LAYERNORM(fallback,4,float16)
1948
  bias_format: SAME
1949
  input_format: SAME
1950
  instance: LayerNorm
@@ -1979,7 +1979,7 @@ model:
1979
  weight_format: BFP[8|8]{64,-1}(SN)
1980
  weight_sparseness: DENSE
1981
  model.decoder.layers.29.final_layer_norm:
1982
- approximation_function: LAYERNORM(fallback,4,float16)
1983
  bias_format: SAME
1984
  input_format: SAME
1985
  instance: LayerNorm
@@ -2018,7 +2018,7 @@ model:
2018
  weight_format: BFP[8|8]{64,-1}(SN)
2019
  weight_sparseness: DENSE
2020
  model.decoder.layers.29.self_attn.softmax:
2021
- approximation_function: SOFTMAX(base2,float16)
2022
  input_format: SAME
2023
  instance: Softmax
2024
  output_format: SAME
@@ -2032,7 +2032,7 @@ model:
2032
  weight_format: BFP[8|8]{64,-1}(SN)
2033
  weight_sparseness: DENSE
2034
  model.decoder.layers.29.self_attn_layer_norm:
2035
- approximation_function: LAYERNORM(fallback,4,float16)
2036
  bias_format: SAME
2037
  input_format: SAME
2038
  instance: LayerNorm
@@ -2067,7 +2067,7 @@ model:
2067
  weight_format: BFP[8|8]{64,-1}(SN)
2068
  weight_sparseness: DENSE
2069
  model.decoder.layers.3.final_layer_norm:
2070
- approximation_function: LAYERNORM(fallback,4,float16)
2071
  bias_format: SAME
2072
  input_format: SAME
2073
  instance: LayerNorm
@@ -2106,7 +2106,7 @@ model:
2106
  weight_format: BFP[8|8]{64,-1}(SN)
2107
  weight_sparseness: DENSE
2108
  model.decoder.layers.3.self_attn.softmax:
2109
- approximation_function: SOFTMAX(base2,float16)
2110
  input_format: SAME
2111
  instance: Softmax
2112
  output_format: SAME
@@ -2120,7 +2120,7 @@ model:
2120
  weight_format: BFP[8|8]{64,-1}(SN)
2121
  weight_sparseness: DENSE
2122
  model.decoder.layers.3.self_attn_layer_norm:
2123
- approximation_function: LAYERNORM(fallback,4,float16)
2124
  bias_format: SAME
2125
  input_format: SAME
2126
  instance: LayerNorm
@@ -2155,7 +2155,7 @@ model:
2155
  weight_format: BFP[8|8]{64,-1}(SN)
2156
  weight_sparseness: DENSE
2157
  model.decoder.layers.30.final_layer_norm:
2158
- approximation_function: LAYERNORM(fallback,4,float16)
2159
  bias_format: SAME
2160
  input_format: SAME
2161
  instance: LayerNorm
@@ -2194,7 +2194,7 @@ model:
2194
  weight_format: BFP[8|8]{64,-1}(SN)
2195
  weight_sparseness: DENSE
2196
  model.decoder.layers.30.self_attn.softmax:
2197
- approximation_function: SOFTMAX(base2,float16)
2198
  input_format: SAME
2199
  instance: Softmax
2200
  output_format: SAME
@@ -2208,7 +2208,7 @@ model:
2208
  weight_format: BFP[8|8]{64,-1}(SN)
2209
  weight_sparseness: DENSE
2210
  model.decoder.layers.30.self_attn_layer_norm:
2211
- approximation_function: LAYERNORM(fallback,4,float16)
2212
  bias_format: SAME
2213
  input_format: SAME
2214
  instance: LayerNorm
@@ -2243,7 +2243,7 @@ model:
2243
  weight_format: BFP[8|8]{64,-1}(SN)
2244
  weight_sparseness: DENSE
2245
  model.decoder.layers.31.final_layer_norm:
2246
- approximation_function: LAYERNORM(fallback,4,float16)
2247
  bias_format: SAME
2248
  input_format: SAME
2249
  instance: LayerNorm
@@ -2282,7 +2282,7 @@ model:
2282
  weight_format: BFP[8|8]{64,-1}(SN)
2283
  weight_sparseness: DENSE
2284
  model.decoder.layers.31.self_attn.softmax:
2285
- approximation_function: SOFTMAX(base2,float16)
2286
  input_format: SAME
2287
  instance: Softmax
2288
  output_format: SAME
@@ -2296,7 +2296,7 @@ model:
2296
  weight_format: BFP[8|8]{64,-1}(SN)
2297
  weight_sparseness: DENSE
2298
  model.decoder.layers.31.self_attn_layer_norm:
2299
- approximation_function: LAYERNORM(fallback,4,float16)
2300
  bias_format: SAME
2301
  input_format: SAME
2302
  instance: LayerNorm
@@ -2331,7 +2331,7 @@ model:
2331
  weight_format: BFP[8|8]{64,-1}(SN)
2332
  weight_sparseness: DENSE
2333
  model.decoder.layers.4.final_layer_norm:
2334
- approximation_function: LAYERNORM(fallback,4,float16)
2335
  bias_format: SAME
2336
  input_format: SAME
2337
  instance: LayerNorm
@@ -2370,7 +2370,7 @@ model:
2370
  weight_format: BFP[8|8]{64,-1}(SN)
2371
  weight_sparseness: DENSE
2372
  model.decoder.layers.4.self_attn.softmax:
2373
- approximation_function: SOFTMAX(base2,float16)
2374
  input_format: SAME
2375
  instance: Softmax
2376
  output_format: SAME
@@ -2384,7 +2384,7 @@ model:
2384
  weight_format: BFP[8|8]{64,-1}(SN)
2385
  weight_sparseness: DENSE
2386
  model.decoder.layers.4.self_attn_layer_norm:
2387
- approximation_function: LAYERNORM(fallback,4,float16)
2388
  bias_format: SAME
2389
  input_format: SAME
2390
  instance: LayerNorm
@@ -2419,7 +2419,7 @@ model:
2419
  weight_format: BFP[8|8]{64,-1}(SN)
2420
  weight_sparseness: DENSE
2421
  model.decoder.layers.5.final_layer_norm:
2422
- approximation_function: LAYERNORM(fallback,4,float16)
2423
  bias_format: SAME
2424
  input_format: SAME
2425
  instance: LayerNorm
@@ -2458,7 +2458,7 @@ model:
2458
  weight_format: BFP[8|8]{64,-1}(SN)
2459
  weight_sparseness: DENSE
2460
  model.decoder.layers.5.self_attn.softmax:
2461
- approximation_function: SOFTMAX(base2,float16)
2462
  input_format: SAME
2463
  instance: Softmax
2464
  output_format: SAME
@@ -2472,7 +2472,7 @@ model:
2472
  weight_format: BFP[8|8]{64,-1}(SN)
2473
  weight_sparseness: DENSE
2474
  model.decoder.layers.5.self_attn_layer_norm:
2475
- approximation_function: LAYERNORM(fallback,4,float16)
2476
  bias_format: SAME
2477
  input_format: SAME
2478
  instance: LayerNorm
@@ -2507,7 +2507,7 @@ model:
2507
  weight_format: BFP[8|8]{64,-1}(SN)
2508
  weight_sparseness: DENSE
2509
  model.decoder.layers.6.final_layer_norm:
2510
- approximation_function: LAYERNORM(fallback,4,float16)
2511
  bias_format: SAME
2512
  input_format: SAME
2513
  instance: LayerNorm
@@ -2546,7 +2546,7 @@ model:
2546
  weight_format: BFP[8|8]{64,-1}(SN)
2547
  weight_sparseness: DENSE
2548
  model.decoder.layers.6.self_attn.softmax:
2549
- approximation_function: SOFTMAX(base2,float16)
2550
  input_format: SAME
2551
  instance: Softmax
2552
  output_format: SAME
@@ -2560,7 +2560,7 @@ model:
2560
  weight_format: BFP[8|8]{64,-1}(SN)
2561
  weight_sparseness: DENSE
2562
  model.decoder.layers.6.self_attn_layer_norm:
2563
- approximation_function: LAYERNORM(fallback,4,float16)
2564
  bias_format: SAME
2565
  input_format: SAME
2566
  instance: LayerNorm
@@ -2595,7 +2595,7 @@ model:
2595
  weight_format: BFP[8|8]{64,-1}(SN)
2596
  weight_sparseness: DENSE
2597
  model.decoder.layers.7.final_layer_norm:
2598
- approximation_function: LAYERNORM(fallback,4,float16)
2599
  bias_format: SAME
2600
  input_format: SAME
2601
  instance: LayerNorm
@@ -2634,7 +2634,7 @@ model:
2634
  weight_format: BFP[8|8]{64,-1}(SN)
2635
  weight_sparseness: DENSE
2636
  model.decoder.layers.7.self_attn.softmax:
2637
- approximation_function: SOFTMAX(base2,float16)
2638
  input_format: SAME
2639
  instance: Softmax
2640
  output_format: SAME
@@ -2648,7 +2648,7 @@ model:
2648
  weight_format: BFP[8|8]{64,-1}(SN)
2649
  weight_sparseness: DENSE
2650
  model.decoder.layers.7.self_attn_layer_norm:
2651
- approximation_function: LAYERNORM(fallback,4,float16)
2652
  bias_format: SAME
2653
  input_format: SAME
2654
  instance: LayerNorm
@@ -2683,7 +2683,7 @@ model:
2683
  weight_format: BFP[8|8]{64,-1}(SN)
2684
  weight_sparseness: DENSE
2685
  model.decoder.layers.8.final_layer_norm:
2686
- approximation_function: LAYERNORM(fallback,4,float16)
2687
  bias_format: SAME
2688
  input_format: SAME
2689
  instance: LayerNorm
@@ -2722,7 +2722,7 @@ model:
2722
  weight_format: BFP[8|8]{64,-1}(SN)
2723
  weight_sparseness: DENSE
2724
  model.decoder.layers.8.self_attn.softmax:
2725
- approximation_function: SOFTMAX(base2,float16)
2726
  input_format: SAME
2727
  instance: Softmax
2728
  output_format: SAME
@@ -2736,7 +2736,7 @@ model:
2736
  weight_format: BFP[8|8]{64,-1}(SN)
2737
  weight_sparseness: DENSE
2738
  model.decoder.layers.8.self_attn_layer_norm:
2739
- approximation_function: LAYERNORM(fallback,4,float16)
2740
  bias_format: SAME
2741
  input_format: SAME
2742
  instance: LayerNorm
@@ -2771,7 +2771,7 @@ model:
2771
  weight_format: BFP[8|8]{64,-1}(SN)
2772
  weight_sparseness: DENSE
2773
  model.decoder.layers.9.final_layer_norm:
2774
- approximation_function: LAYERNORM(fallback,4,float16)
2775
  bias_format: SAME
2776
  input_format: SAME
2777
  instance: LayerNorm
@@ -2810,7 +2810,7 @@ model:
2810
  weight_format: BFP[8|8]{64,-1}(SN)
2811
  weight_sparseness: DENSE
2812
  model.decoder.layers.9.self_attn.softmax:
2813
- approximation_function: SOFTMAX(base2,float16)
2814
  input_format: SAME
2815
  instance: Softmax
2816
  output_format: SAME
@@ -2824,7 +2824,7 @@ model:
2824
  weight_format: BFP[8|8]{64,-1}(SN)
2825
  weight_sparseness: DENSE
2826
  model.decoder.layers.9.self_attn_layer_norm:
2827
- approximation_function: LAYERNORM(fallback,4,float16)
2828
  bias_format: SAME
2829
  input_format: SAME
2830
  instance: LayerNorm
 
8
  weight_format: SAME
9
  weight_sparseness: DENSE
10
  model.decoder.final_layer_norm:
11
+ approximation_function: NONE
12
  bias_format: SAME
13
  input_format: SAME
14
  instance: LayerNorm
 
43
  weight_format: BFP[8|8]{64,-1}(SN)
44
  weight_sparseness: DENSE
45
  model.decoder.layers.0.final_layer_norm:
46
+ approximation_function: NONE
47
  bias_format: SAME
48
  input_format: SAME
49
  instance: LayerNorm
 
82
  weight_format: BFP[8|8]{64,-1}(SN)
83
  weight_sparseness: DENSE
84
  model.decoder.layers.0.self_attn.softmax:
85
+ approximation_function: NONE
86
  input_format: SAME
87
  instance: Softmax
88
  output_format: SAME
 
96
  weight_format: BFP[8|8]{64,-1}(SN)
97
  weight_sparseness: DENSE
98
  model.decoder.layers.0.self_attn_layer_norm:
99
+ approximation_function: NONE
100
  bias_format: SAME
101
  input_format: SAME
102
  instance: LayerNorm
 
131
  weight_format: BFP[8|8]{64,-1}(SN)
132
  weight_sparseness: DENSE
133
  model.decoder.layers.1.final_layer_norm:
134
+ approximation_function: NONE
135
  bias_format: SAME
136
  input_format: SAME
137
  instance: LayerNorm
 
170
  weight_format: BFP[8|8]{64,-1}(SN)
171
  weight_sparseness: DENSE
172
  model.decoder.layers.1.self_attn.softmax:
173
+ approximation_function: NONE
174
  input_format: SAME
175
  instance: Softmax
176
  output_format: SAME
 
184
  weight_format: BFP[8|8]{64,-1}(SN)
185
  weight_sparseness: DENSE
186
  model.decoder.layers.1.self_attn_layer_norm:
187
+ approximation_function: NONE
188
  bias_format: SAME
189
  input_format: SAME
190
  instance: LayerNorm
 
219
  weight_format: BFP[8|8]{64,-1}(SN)
220
  weight_sparseness: DENSE
221
  model.decoder.layers.10.final_layer_norm:
222
+ approximation_function: NONE
223
  bias_format: SAME
224
  input_format: SAME
225
  instance: LayerNorm
 
258
  weight_format: BFP[8|8]{64,-1}(SN)
259
  weight_sparseness: DENSE
260
  model.decoder.layers.10.self_attn.softmax:
261
+ approximation_function: NONE
262
  input_format: SAME
263
  instance: Softmax
264
  output_format: SAME
 
272
  weight_format: BFP[8|8]{64,-1}(SN)
273
  weight_sparseness: DENSE
274
  model.decoder.layers.10.self_attn_layer_norm:
275
+ approximation_function: NONE
276
  bias_format: SAME
277
  input_format: SAME
278
  instance: LayerNorm
 
307
  weight_format: BFP[8|8]{64,-1}(SN)
308
  weight_sparseness: DENSE
309
  model.decoder.layers.11.final_layer_norm:
310
+ approximation_function: NONE
311
  bias_format: SAME
312
  input_format: SAME
313
  instance: LayerNorm
 
346
  weight_format: BFP[8|8]{64,-1}(SN)
347
  weight_sparseness: DENSE
348
  model.decoder.layers.11.self_attn.softmax:
349
+ approximation_function: NONE
350
  input_format: SAME
351
  instance: Softmax
352
  output_format: SAME
 
360
  weight_format: BFP[8|8]{64,-1}(SN)
361
  weight_sparseness: DENSE
362
  model.decoder.layers.11.self_attn_layer_norm:
363
+ approximation_function: NONE
364
  bias_format: SAME
365
  input_format: SAME
366
  instance: LayerNorm
 
395
  weight_format: BFP[8|8]{64,-1}(SN)
396
  weight_sparseness: DENSE
397
  model.decoder.layers.12.final_layer_norm:
398
+ approximation_function: NONE
399
  bias_format: SAME
400
  input_format: SAME
401
  instance: LayerNorm
 
434
  weight_format: BFP[8|8]{64,-1}(SN)
435
  weight_sparseness: DENSE
436
  model.decoder.layers.12.self_attn.softmax:
437
+ approximation_function: NONE
438
  input_format: SAME
439
  instance: Softmax
440
  output_format: SAME
 
448
  weight_format: BFP[8|8]{64,-1}(SN)
449
  weight_sparseness: DENSE
450
  model.decoder.layers.12.self_attn_layer_norm:
451
+ approximation_function: NONE
452
  bias_format: SAME
453
  input_format: SAME
454
  instance: LayerNorm
 
483
  weight_format: BFP[8|8]{64,-1}(SN)
484
  weight_sparseness: DENSE
485
  model.decoder.layers.13.final_layer_norm:
486
+ approximation_function: NONE
487
  bias_format: SAME
488
  input_format: SAME
489
  instance: LayerNorm
 
522
  weight_format: BFP[8|8]{64,-1}(SN)
523
  weight_sparseness: DENSE
524
  model.decoder.layers.13.self_attn.softmax:
525
+ approximation_function: NONE
526
  input_format: SAME
527
  instance: Softmax
528
  output_format: SAME
 
536
  weight_format: BFP[8|8]{64,-1}(SN)
537
  weight_sparseness: DENSE
538
  model.decoder.layers.13.self_attn_layer_norm:
539
+ approximation_function: NONE
540
  bias_format: SAME
541
  input_format: SAME
542
  instance: LayerNorm
 
571
  weight_format: BFP[8|8]{64,-1}(SN)
572
  weight_sparseness: DENSE
573
  model.decoder.layers.14.final_layer_norm:
574
+ approximation_function: NONE
575
  bias_format: SAME
576
  input_format: SAME
577
  instance: LayerNorm
 
610
  weight_format: BFP[8|8]{64,-1}(SN)
611
  weight_sparseness: DENSE
612
  model.decoder.layers.14.self_attn.softmax:
613
+ approximation_function: NONE
614
  input_format: SAME
615
  instance: Softmax
616
  output_format: SAME
 
624
  weight_format: BFP[8|8]{64,-1}(SN)
625
  weight_sparseness: DENSE
626
  model.decoder.layers.14.self_attn_layer_norm:
627
+ approximation_function: NONE
628
  bias_format: SAME
629
  input_format: SAME
630
  instance: LayerNorm
 
659
  weight_format: BFP[8|8]{64,-1}(SN)
660
  weight_sparseness: DENSE
661
  model.decoder.layers.15.final_layer_norm:
662
+ approximation_function: NONE
663
  bias_format: SAME
664
  input_format: SAME
665
  instance: LayerNorm
 
698
  weight_format: BFP[8|8]{64,-1}(SN)
699
  weight_sparseness: DENSE
700
  model.decoder.layers.15.self_attn.softmax:
701
+ approximation_function: NONE
702
  input_format: SAME
703
  instance: Softmax
704
  output_format: SAME
 
712
  weight_format: BFP[8|8]{64,-1}(SN)
713
  weight_sparseness: DENSE
714
  model.decoder.layers.15.self_attn_layer_norm:
715
+ approximation_function: NONE
716
  bias_format: SAME
717
  input_format: SAME
718
  instance: LayerNorm
 
747
  weight_format: BFP[8|8]{64,-1}(SN)
748
  weight_sparseness: DENSE
749
  model.decoder.layers.16.final_layer_norm:
750
+ approximation_function: NONE
751
  bias_format: SAME
752
  input_format: SAME
753
  instance: LayerNorm
 
786
  weight_format: BFP[8|8]{64,-1}(SN)
787
  weight_sparseness: DENSE
788
  model.decoder.layers.16.self_attn.softmax:
789
+ approximation_function: NONE
790
  input_format: SAME
791
  instance: Softmax
792
  output_format: SAME
 
800
  weight_format: BFP[8|8]{64,-1}(SN)
801
  weight_sparseness: DENSE
802
  model.decoder.layers.16.self_attn_layer_norm:
803
+ approximation_function: NONE
804
  bias_format: SAME
805
  input_format: SAME
806
  instance: LayerNorm
 
835
  weight_format: BFP[8|8]{64,-1}(SN)
836
  weight_sparseness: DENSE
837
  model.decoder.layers.17.final_layer_norm:
838
+ approximation_function: NONE
839
  bias_format: SAME
840
  input_format: SAME
841
  instance: LayerNorm
 
874
  weight_format: BFP[8|8]{64,-1}(SN)
875
  weight_sparseness: DENSE
876
  model.decoder.layers.17.self_attn.softmax:
877
+ approximation_function: NONE
878
  input_format: SAME
879
  instance: Softmax
880
  output_format: SAME
 
888
  weight_format: BFP[8|8]{64,-1}(SN)
889
  weight_sparseness: DENSE
890
  model.decoder.layers.17.self_attn_layer_norm:
891
+ approximation_function: NONE
892
  bias_format: SAME
893
  input_format: SAME
894
  instance: LayerNorm
 
923
  weight_format: BFP[8|8]{64,-1}(SN)
924
  weight_sparseness: DENSE
925
  model.decoder.layers.18.final_layer_norm:
926
+ approximation_function: NONE
927
  bias_format: SAME
928
  input_format: SAME
929
  instance: LayerNorm
 
962
  weight_format: BFP[8|8]{64,-1}(SN)
963
  weight_sparseness: DENSE
964
  model.decoder.layers.18.self_attn.softmax:
965
+ approximation_function: NONE
966
  input_format: SAME
967
  instance: Softmax
968
  output_format: SAME
 
976
  weight_format: BFP[8|8]{64,-1}(SN)
977
  weight_sparseness: DENSE
978
  model.decoder.layers.18.self_attn_layer_norm:
979
+ approximation_function: NONE
980
  bias_format: SAME
981
  input_format: SAME
982
  instance: LayerNorm
 
1011
  weight_format: BFP[8|8]{64,-1}(SN)
1012
  weight_sparseness: DENSE
1013
  model.decoder.layers.19.final_layer_norm:
1014
+ approximation_function: NONE
1015
  bias_format: SAME
1016
  input_format: SAME
1017
  instance: LayerNorm
 
1050
  weight_format: BFP[8|8]{64,-1}(SN)
1051
  weight_sparseness: DENSE
1052
  model.decoder.layers.19.self_attn.softmax:
1053
+ approximation_function: NONE
1054
  input_format: SAME
1055
  instance: Softmax
1056
  output_format: SAME
 
1064
  weight_format: BFP[8|8]{64,-1}(SN)
1065
  weight_sparseness: DENSE
1066
  model.decoder.layers.19.self_attn_layer_norm:
1067
+ approximation_function: NONE
1068
  bias_format: SAME
1069
  input_format: SAME
1070
  instance: LayerNorm
 
1099
  weight_format: BFP[8|8]{64,-1}(SN)
1100
  weight_sparseness: DENSE
1101
  model.decoder.layers.2.final_layer_norm:
1102
+ approximation_function: NONE
1103
  bias_format: SAME
1104
  input_format: SAME
1105
  instance: LayerNorm
 
1138
  weight_format: BFP[8|8]{64,-1}(SN)
1139
  weight_sparseness: DENSE
1140
  model.decoder.layers.2.self_attn.softmax:
1141
+ approximation_function: NONE
1142
  input_format: SAME
1143
  instance: Softmax
1144
  output_format: SAME
 
1152
  weight_format: BFP[8|8]{64,-1}(SN)
1153
  weight_sparseness: DENSE
1154
  model.decoder.layers.2.self_attn_layer_norm:
1155
+ approximation_function: NONE
1156
  bias_format: SAME
1157
  input_format: SAME
1158
  instance: LayerNorm
 
1187
  weight_format: BFP[8|8]{64,-1}(SN)
1188
  weight_sparseness: DENSE
1189
  model.decoder.layers.20.final_layer_norm:
1190
+ approximation_function: NONE
1191
  bias_format: SAME
1192
  input_format: SAME
1193
  instance: LayerNorm
 
1226
  weight_format: BFP[8|8]{64,-1}(SN)
1227
  weight_sparseness: DENSE
1228
  model.decoder.layers.20.self_attn.softmax:
1229
+ approximation_function: NONE
1230
  input_format: SAME
1231
  instance: Softmax
1232
  output_format: SAME
 
1240
  weight_format: BFP[8|8]{64,-1}(SN)
1241
  weight_sparseness: DENSE
1242
  model.decoder.layers.20.self_attn_layer_norm:
1243
+ approximation_function: NONE
1244
  bias_format: SAME
1245
  input_format: SAME
1246
  instance: LayerNorm
 
1275
  weight_format: BFP[8|8]{64,-1}(SN)
1276
  weight_sparseness: DENSE
1277
  model.decoder.layers.21.final_layer_norm:
1278
+ approximation_function: NONE
1279
  bias_format: SAME
1280
  input_format: SAME
1281
  instance: LayerNorm
 
1314
  weight_format: BFP[8|8]{64,-1}(SN)
1315
  weight_sparseness: DENSE
1316
  model.decoder.layers.21.self_attn.softmax:
1317
+ approximation_function: NONE
1318
  input_format: SAME
1319
  instance: Softmax
1320
  output_format: SAME
 
1328
  weight_format: BFP[8|8]{64,-1}(SN)
1329
  weight_sparseness: DENSE
1330
  model.decoder.layers.21.self_attn_layer_norm:
1331
+ approximation_function: NONE
1332
  bias_format: SAME
1333
  input_format: SAME
1334
  instance: LayerNorm
 
1363
  weight_format: BFP[8|8]{64,-1}(SN)
1364
  weight_sparseness: DENSE
1365
  model.decoder.layers.22.final_layer_norm:
1366
+ approximation_function: NONE
1367
  bias_format: SAME
1368
  input_format: SAME
1369
  instance: LayerNorm
 
1402
  weight_format: BFP[8|8]{64,-1}(SN)
1403
  weight_sparseness: DENSE
1404
  model.decoder.layers.22.self_attn.softmax:
1405
+ approximation_function: NONE
1406
  input_format: SAME
1407
  instance: Softmax
1408
  output_format: SAME
 
1416
  weight_format: BFP[8|8]{64,-1}(SN)
1417
  weight_sparseness: DENSE
1418
  model.decoder.layers.22.self_attn_layer_norm:
1419
+ approximation_function: NONE
1420
  bias_format: SAME
1421
  input_format: SAME
1422
  instance: LayerNorm
 
1451
  weight_format: BFP[8|8]{64,-1}(SN)
1452
  weight_sparseness: DENSE
1453
  model.decoder.layers.23.final_layer_norm:
1454
+ approximation_function: NONE
1455
  bias_format: SAME
1456
  input_format: SAME
1457
  instance: LayerNorm
 
1490
  weight_format: BFP[8|8]{64,-1}(SN)
1491
  weight_sparseness: DENSE
1492
  model.decoder.layers.23.self_attn.softmax:
1493
+ approximation_function: NONE
1494
  input_format: SAME
1495
  instance: Softmax
1496
  output_format: SAME
 
1504
  weight_format: BFP[8|8]{64,-1}(SN)
1505
  weight_sparseness: DENSE
1506
  model.decoder.layers.23.self_attn_layer_norm:
1507
+ approximation_function: NONE
1508
  bias_format: SAME
1509
  input_format: SAME
1510
  instance: LayerNorm
 
1539
  weight_format: BFP[8|8]{64,-1}(SN)
1540
  weight_sparseness: DENSE
1541
  model.decoder.layers.24.final_layer_norm:
1542
+ approximation_function: NONE
1543
  bias_format: SAME
1544
  input_format: SAME
1545
  instance: LayerNorm
 
1578
  weight_format: BFP[8|8]{64,-1}(SN)
1579
  weight_sparseness: DENSE
1580
  model.decoder.layers.24.self_attn.softmax:
1581
+ approximation_function: NONE
1582
  input_format: SAME
1583
  instance: Softmax
1584
  output_format: SAME
 
1592
  weight_format: BFP[8|8]{64,-1}(SN)
1593
  weight_sparseness: DENSE
1594
  model.decoder.layers.24.self_attn_layer_norm:
1595
+ approximation_function: NONE
1596
  bias_format: SAME
1597
  input_format: SAME
1598
  instance: LayerNorm
 
1627
  weight_format: BFP[8|8]{64,-1}(SN)
1628
  weight_sparseness: DENSE
1629
  model.decoder.layers.25.final_layer_norm:
1630
+ approximation_function: NONE
1631
  bias_format: SAME
1632
  input_format: SAME
1633
  instance: LayerNorm
 
1666
  weight_format: BFP[8|8]{64,-1}(SN)
1667
  weight_sparseness: DENSE
1668
  model.decoder.layers.25.self_attn.softmax:
1669
+ approximation_function: NONE
1670
  input_format: SAME
1671
  instance: Softmax
1672
  output_format: SAME
 
1680
  weight_format: BFP[8|8]{64,-1}(SN)
1681
  weight_sparseness: DENSE
1682
  model.decoder.layers.25.self_attn_layer_norm:
1683
+ approximation_function: NONE
1684
  bias_format: SAME
1685
  input_format: SAME
1686
  instance: LayerNorm
 
1715
  weight_format: BFP[8|8]{64,-1}(SN)
1716
  weight_sparseness: DENSE
1717
  model.decoder.layers.26.final_layer_norm:
1718
+ approximation_function: NONE
1719
  bias_format: SAME
1720
  input_format: SAME
1721
  instance: LayerNorm
 
1754
  weight_format: BFP[8|8]{64,-1}(SN)
1755
  weight_sparseness: DENSE
1756
  model.decoder.layers.26.self_attn.softmax:
1757
+ approximation_function: NONE
1758
  input_format: SAME
1759
  instance: Softmax
1760
  output_format: SAME
 
1768
  weight_format: BFP[8|8]{64,-1}(SN)
1769
  weight_sparseness: DENSE
1770
  model.decoder.layers.26.self_attn_layer_norm:
1771
+ approximation_function: NONE
1772
  bias_format: SAME
1773
  input_format: SAME
1774
  instance: LayerNorm
 
1803
  weight_format: BFP[8|8]{64,-1}(SN)
1804
  weight_sparseness: DENSE
1805
  model.decoder.layers.27.final_layer_norm:
1806
+ approximation_function: NONE
1807
  bias_format: SAME
1808
  input_format: SAME
1809
  instance: LayerNorm
 
1842
  weight_format: BFP[8|8]{64,-1}(SN)
1843
  weight_sparseness: DENSE
1844
  model.decoder.layers.27.self_attn.softmax:
1845
+ approximation_function: NONE
1846
  input_format: SAME
1847
  instance: Softmax
1848
  output_format: SAME
 
1856
  weight_format: BFP[8|8]{64,-1}(SN)
1857
  weight_sparseness: DENSE
1858
  model.decoder.layers.27.self_attn_layer_norm:
1859
+ approximation_function: NONE
1860
  bias_format: SAME
1861
  input_format: SAME
1862
  instance: LayerNorm
 
1891
  weight_format: BFP[8|8]{64,-1}(SN)
1892
  weight_sparseness: DENSE
1893
  model.decoder.layers.28.final_layer_norm:
1894
+ approximation_function: NONE
1895
  bias_format: SAME
1896
  input_format: SAME
1897
  instance: LayerNorm
 
1930
  weight_format: BFP[8|8]{64,-1}(SN)
1931
  weight_sparseness: DENSE
1932
  model.decoder.layers.28.self_attn.softmax:
1933
+ approximation_function: NONE
1934
  input_format: SAME
1935
  instance: Softmax
1936
  output_format: SAME
 
1944
  weight_format: BFP[8|8]{64,-1}(SN)
1945
  weight_sparseness: DENSE
1946
  model.decoder.layers.28.self_attn_layer_norm:
1947
+ approximation_function: NONE
1948
  bias_format: SAME
1949
  input_format: SAME
1950
  instance: LayerNorm
 
1979
  weight_format: BFP[8|8]{64,-1}(SN)
1980
  weight_sparseness: DENSE
1981
  model.decoder.layers.29.final_layer_norm:
1982
+ approximation_function: NONE
1983
  bias_format: SAME
1984
  input_format: SAME
1985
  instance: LayerNorm
 
2018
  weight_format: BFP[8|8]{64,-1}(SN)
2019
  weight_sparseness: DENSE
2020
  model.decoder.layers.29.self_attn.softmax:
2021
+ approximation_function: NONE
2022
  input_format: SAME
2023
  instance: Softmax
2024
  output_format: SAME
 
2032
  weight_format: BFP[8|8]{64,-1}(SN)
2033
  weight_sparseness: DENSE
2034
  model.decoder.layers.29.self_attn_layer_norm:
2035
+ approximation_function: NONE
2036
  bias_format: SAME
2037
  input_format: SAME
2038
  instance: LayerNorm
 
2067
  weight_format: BFP[8|8]{64,-1}(SN)
2068
  weight_sparseness: DENSE
2069
  model.decoder.layers.3.final_layer_norm:
2070
+ approximation_function: NONE
2071
  bias_format: SAME
2072
  input_format: SAME
2073
  instance: LayerNorm
 
2106
  weight_format: BFP[8|8]{64,-1}(SN)
2107
  weight_sparseness: DENSE
2108
  model.decoder.layers.3.self_attn.softmax:
2109
+ approximation_function: NONE
2110
  input_format: SAME
2111
  instance: Softmax
2112
  output_format: SAME
 
2120
  weight_format: BFP[8|8]{64,-1}(SN)
2121
  weight_sparseness: DENSE
2122
  model.decoder.layers.3.self_attn_layer_norm:
2123
+ approximation_function: NONE
2124
  bias_format: SAME
2125
  input_format: SAME
2126
  instance: LayerNorm
 
2155
  weight_format: BFP[8|8]{64,-1}(SN)
2156
  weight_sparseness: DENSE
2157
  model.decoder.layers.30.final_layer_norm:
2158
+ approximation_function: NONE
2159
  bias_format: SAME
2160
  input_format: SAME
2161
  instance: LayerNorm
 
2194
  weight_format: BFP[8|8]{64,-1}(SN)
2195
  weight_sparseness: DENSE
2196
  model.decoder.layers.30.self_attn.softmax:
2197
+ approximation_function: NONE
2198
  input_format: SAME
2199
  instance: Softmax
2200
  output_format: SAME
 
2208
  weight_format: BFP[8|8]{64,-1}(SN)
2209
  weight_sparseness: DENSE
2210
  model.decoder.layers.30.self_attn_layer_norm:
2211
+ approximation_function: NONE
2212
  bias_format: SAME
2213
  input_format: SAME
2214
  instance: LayerNorm
 
2243
  weight_format: BFP[8|8]{64,-1}(SN)
2244
  weight_sparseness: DENSE
2245
  model.decoder.layers.31.final_layer_norm:
2246
+ approximation_function: NONE
2247
  bias_format: SAME
2248
  input_format: SAME
2249
  instance: LayerNorm
 
2282
  weight_format: BFP[8|8]{64,-1}(SN)
2283
  weight_sparseness: DENSE
2284
  model.decoder.layers.31.self_attn.softmax:
2285
+ approximation_function: NONE
2286
  input_format: SAME
2287
  instance: Softmax
2288
  output_format: SAME
 
2296
  weight_format: BFP[8|8]{64,-1}(SN)
2297
  weight_sparseness: DENSE
2298
  model.decoder.layers.31.self_attn_layer_norm:
2299
+ approximation_function: NONE
2300
  bias_format: SAME
2301
  input_format: SAME
2302
  instance: LayerNorm
 
2331
  weight_format: BFP[8|8]{64,-1}(SN)
2332
  weight_sparseness: DENSE
2333
  model.decoder.layers.4.final_layer_norm:
2334
+ approximation_function: NONE
2335
  bias_format: SAME
2336
  input_format: SAME
2337
  instance: LayerNorm
 
2370
  weight_format: BFP[8|8]{64,-1}(SN)
2371
  weight_sparseness: DENSE
2372
  model.decoder.layers.4.self_attn.softmax:
2373
+ approximation_function: NONE
2374
  input_format: SAME
2375
  instance: Softmax
2376
  output_format: SAME
 
2384
  weight_format: BFP[8|8]{64,-1}(SN)
2385
  weight_sparseness: DENSE
2386
  model.decoder.layers.4.self_attn_layer_norm:
2387
+ approximation_function: NONE
2388
  bias_format: SAME
2389
  input_format: SAME
2390
  instance: LayerNorm
 
2419
  weight_format: BFP[8|8]{64,-1}(SN)
2420
  weight_sparseness: DENSE
2421
  model.decoder.layers.5.final_layer_norm:
2422
+ approximation_function: NONE
2423
  bias_format: SAME
2424
  input_format: SAME
2425
  instance: LayerNorm
 
2458
  weight_format: BFP[8|8]{64,-1}(SN)
2459
  weight_sparseness: DENSE
2460
  model.decoder.layers.5.self_attn.softmax:
2461
+ approximation_function: NONE
2462
  input_format: SAME
2463
  instance: Softmax
2464
  output_format: SAME
 
2472
  weight_format: BFP[8|8]{64,-1}(SN)
2473
  weight_sparseness: DENSE
2474
  model.decoder.layers.5.self_attn_layer_norm:
2475
+ approximation_function: NONE
2476
  bias_format: SAME
2477
  input_format: SAME
2478
  instance: LayerNorm
 
2507
  weight_format: BFP[8|8]{64,-1}(SN)
2508
  weight_sparseness: DENSE
2509
  model.decoder.layers.6.final_layer_norm:
2510
+ approximation_function: NONE
2511
  bias_format: SAME
2512
  input_format: SAME
2513
  instance: LayerNorm
 
2546
  weight_format: BFP[8|8]{64,-1}(SN)
2547
  weight_sparseness: DENSE
2548
  model.decoder.layers.6.self_attn.softmax:
2549
+ approximation_function: NONE
2550
  input_format: SAME
2551
  instance: Softmax
2552
  output_format: SAME
 
2560
  weight_format: BFP[8|8]{64,-1}(SN)
2561
  weight_sparseness: DENSE
2562
  model.decoder.layers.6.self_attn_layer_norm:
2563
+ approximation_function: NONE
2564
  bias_format: SAME
2565
  input_format: SAME
2566
  instance: LayerNorm
 
2595
  weight_format: BFP[8|8]{64,-1}(SN)
2596
  weight_sparseness: DENSE
2597
  model.decoder.layers.7.final_layer_norm:
2598
+ approximation_function: NONE
2599
  bias_format: SAME
2600
  input_format: SAME
2601
  instance: LayerNorm
 
2634
  weight_format: BFP[8|8]{64,-1}(SN)
2635
  weight_sparseness: DENSE
2636
  model.decoder.layers.7.self_attn.softmax:
2637
+ approximation_function: NONE
2638
  input_format: SAME
2639
  instance: Softmax
2640
  output_format: SAME
 
2648
  weight_format: BFP[8|8]{64,-1}(SN)
2649
  weight_sparseness: DENSE
2650
  model.decoder.layers.7.self_attn_layer_norm:
2651
+ approximation_function: NONE
2652
  bias_format: SAME
2653
  input_format: SAME
2654
  instance: LayerNorm
 
2683
  weight_format: BFP[8|8]{64,-1}(SN)
2684
  weight_sparseness: DENSE
2685
  model.decoder.layers.8.final_layer_norm:
2686
+ approximation_function: NONE
2687
  bias_format: SAME
2688
  input_format: SAME
2689
  instance: LayerNorm
 
2722
  weight_format: BFP[8|8]{64,-1}(SN)
2723
  weight_sparseness: DENSE
2724
  model.decoder.layers.8.self_attn.softmax:
2725
+ approximation_function: NONE
2726
  input_format: SAME
2727
  instance: Softmax
2728
  output_format: SAME
 
2736
  weight_format: BFP[8|8]{64,-1}(SN)
2737
  weight_sparseness: DENSE
2738
  model.decoder.layers.8.self_attn_layer_norm:
2739
+ approximation_function: NONE
2740
  bias_format: SAME
2741
  input_format: SAME
2742
  instance: LayerNorm
 
2771
  weight_format: BFP[8|8]{64,-1}(SN)
2772
  weight_sparseness: DENSE
2773
  model.decoder.layers.9.final_layer_norm:
2774
+ approximation_function: NONE
2775
  bias_format: SAME
2776
  input_format: SAME
2777
  instance: LayerNorm
 
2810
  weight_format: BFP[8|8]{64,-1}(SN)
2811
  weight_sparseness: DENSE
2812
  model.decoder.layers.9.self_attn.softmax:
2813
+ approximation_function: NONE
2814
  input_format: SAME
2815
  instance: Softmax
2816
  output_format: SAME
 
2824
  weight_format: BFP[8|8]{64,-1}(SN)
2825
  weight_sparseness: DENSE
2826
  model.decoder.layers.9.self_attn_layer_norm:
2827
+ approximation_function: NONE
2828
  bias_format: SAME
2829
  input_format: SAME
2830
  instance: LayerNorm