d-matrix commited on
Commit
8f5ab24
·
verified ·
1 Parent(s): fbd0599

Update configs/BASIC.yaml

Browse files
Files changed (1) hide show
  1. configs/BASIC.yaml +73 -73
configs/BASIC.yaml CHANGED
@@ -8,7 +8,7 @@ model:
8
  weight_format: SAME
9
  weight_sparseness: DENSE
10
  model.decoder.final_layer_norm:
11
- approximation_function: LAYERNORM(fallback,4,float16)
12
  bias_format: SAME
13
  input_format: SAME
14
  instance: LayerNorm
@@ -43,7 +43,7 @@ model:
43
  weight_format: BFP[8|8]{64,-1}(SN)
44
  weight_sparseness: DENSE
45
  model.decoder.layers.0.final_layer_norm:
46
- approximation_function: LAYERNORM(fallback,4,float16)
47
  bias_format: SAME
48
  input_format: SAME
49
  instance: LayerNorm
@@ -82,7 +82,7 @@ model:
82
  weight_format: BFP[8|8]{64,-1}(SN)
83
  weight_sparseness: DENSE
84
  model.decoder.layers.0.self_attn.softmax:
85
- approximation_function: SOFTMAX(base2,float16)
86
  input_format: SAME
87
  instance: Softmax
88
  output_format: SAME
@@ -96,7 +96,7 @@ model:
96
  weight_format: BFP[8|8]{64,-1}(SN)
97
  weight_sparseness: DENSE
98
  model.decoder.layers.0.self_attn_layer_norm:
99
- approximation_function: LAYERNORM(fallback,4,float16)
100
  bias_format: SAME
101
  input_format: SAME
102
  instance: LayerNorm
@@ -131,7 +131,7 @@ model:
131
  weight_format: BFP[8|8]{64,-1}(SN)
132
  weight_sparseness: DENSE
133
  model.decoder.layers.1.final_layer_norm:
134
- approximation_function: LAYERNORM(fallback,4,float16)
135
  bias_format: SAME
136
  input_format: SAME
137
  instance: LayerNorm
@@ -170,7 +170,7 @@ model:
170
  weight_format: BFP[8|8]{64,-1}(SN)
171
  weight_sparseness: DENSE
172
  model.decoder.layers.1.self_attn.softmax:
173
- approximation_function: SOFTMAX(base2,float16)
174
  input_format: SAME
175
  instance: Softmax
176
  output_format: SAME
@@ -184,7 +184,7 @@ model:
184
  weight_format: BFP[8|8]{64,-1}(SN)
185
  weight_sparseness: DENSE
186
  model.decoder.layers.1.self_attn_layer_norm:
187
- approximation_function: LAYERNORM(fallback,4,float16)
188
  bias_format: SAME
189
  input_format: SAME
190
  instance: LayerNorm
@@ -219,7 +219,7 @@ model:
219
  weight_format: BFP[8|8]{64,-1}(SN)
220
  weight_sparseness: DENSE
221
  model.decoder.layers.10.final_layer_norm:
222
- approximation_function: LAYERNORM(fallback,4,float16)
223
  bias_format: SAME
224
  input_format: SAME
225
  instance: LayerNorm
@@ -258,7 +258,7 @@ model:
258
  weight_format: BFP[8|8]{64,-1}(SN)
259
  weight_sparseness: DENSE
260
  model.decoder.layers.10.self_attn.softmax:
261
- approximation_function: SOFTMAX(base2,float16)
262
  input_format: SAME
263
  instance: Softmax
264
  output_format: SAME
@@ -272,7 +272,7 @@ model:
272
  weight_format: BFP[8|8]{64,-1}(SN)
273
  weight_sparseness: DENSE
274
  model.decoder.layers.10.self_attn_layer_norm:
275
- approximation_function: LAYERNORM(fallback,4,float16)
276
  bias_format: SAME
277
  input_format: SAME
278
  instance: LayerNorm
@@ -307,7 +307,7 @@ model:
307
  weight_format: BFP[8|8]{64,-1}(SN)
308
  weight_sparseness: DENSE
309
  model.decoder.layers.11.final_layer_norm:
310
- approximation_function: LAYERNORM(fallback,4,float16)
311
  bias_format: SAME
312
  input_format: SAME
313
  instance: LayerNorm
@@ -346,7 +346,7 @@ model:
346
  weight_format: BFP[8|8]{64,-1}(SN)
347
  weight_sparseness: DENSE
348
  model.decoder.layers.11.self_attn.softmax:
349
- approximation_function: SOFTMAX(base2,float16)
350
  input_format: SAME
351
  instance: Softmax
352
  output_format: SAME
@@ -360,7 +360,7 @@ model:
360
  weight_format: BFP[8|8]{64,-1}(SN)
361
  weight_sparseness: DENSE
362
  model.decoder.layers.11.self_attn_layer_norm:
363
- approximation_function: LAYERNORM(fallback,4,float16)
364
  bias_format: SAME
365
  input_format: SAME
366
  instance: LayerNorm
@@ -395,7 +395,7 @@ model:
395
  weight_format: BFP[8|8]{64,-1}(SN)
396
  weight_sparseness: DENSE
397
  model.decoder.layers.12.final_layer_norm:
398
- approximation_function: LAYERNORM(fallback,4,float16)
399
  bias_format: SAME
400
  input_format: SAME
401
  instance: LayerNorm
@@ -434,7 +434,7 @@ model:
434
  weight_format: BFP[8|8]{64,-1}(SN)
435
  weight_sparseness: DENSE
436
  model.decoder.layers.12.self_attn.softmax:
437
- approximation_function: SOFTMAX(base2,float16)
438
  input_format: SAME
439
  instance: Softmax
440
  output_format: SAME
@@ -448,7 +448,7 @@ model:
448
  weight_format: BFP[8|8]{64,-1}(SN)
449
  weight_sparseness: DENSE
450
  model.decoder.layers.12.self_attn_layer_norm:
451
- approximation_function: LAYERNORM(fallback,4,float16)
452
  bias_format: SAME
453
  input_format: SAME
454
  instance: LayerNorm
@@ -483,7 +483,7 @@ model:
483
  weight_format: BFP[8|8]{64,-1}(SN)
484
  weight_sparseness: DENSE
485
  model.decoder.layers.13.final_layer_norm:
486
- approximation_function: LAYERNORM(fallback,4,float16)
487
  bias_format: SAME
488
  input_format: SAME
489
  instance: LayerNorm
@@ -522,7 +522,7 @@ model:
522
  weight_format: BFP[8|8]{64,-1}(SN)
523
  weight_sparseness: DENSE
524
  model.decoder.layers.13.self_attn.softmax:
525
- approximation_function: SOFTMAX(base2,float16)
526
  input_format: SAME
527
  instance: Softmax
528
  output_format: SAME
@@ -536,7 +536,7 @@ model:
536
  weight_format: BFP[8|8]{64,-1}(SN)
537
  weight_sparseness: DENSE
538
  model.decoder.layers.13.self_attn_layer_norm:
539
- approximation_function: LAYERNORM(fallback,4,float16)
540
  bias_format: SAME
541
  input_format: SAME
542
  instance: LayerNorm
@@ -571,7 +571,7 @@ model:
571
  weight_format: BFP[8|8]{64,-1}(SN)
572
  weight_sparseness: DENSE
573
  model.decoder.layers.14.final_layer_norm:
574
- approximation_function: LAYERNORM(fallback,4,float16)
575
  bias_format: SAME
576
  input_format: SAME
577
  instance: LayerNorm
@@ -610,7 +610,7 @@ model:
610
  weight_format: BFP[8|8]{64,-1}(SN)
611
  weight_sparseness: DENSE
612
  model.decoder.layers.14.self_attn.softmax:
613
- approximation_function: SOFTMAX(base2,float16)
614
  input_format: SAME
615
  instance: Softmax
616
  output_format: SAME
@@ -624,7 +624,7 @@ model:
624
  weight_format: BFP[8|8]{64,-1}(SN)
625
  weight_sparseness: DENSE
626
  model.decoder.layers.14.self_attn_layer_norm:
627
- approximation_function: LAYERNORM(fallback,4,float16)
628
  bias_format: SAME
629
  input_format: SAME
630
  instance: LayerNorm
@@ -659,7 +659,7 @@ model:
659
  weight_format: BFP[8|8]{64,-1}(SN)
660
  weight_sparseness: DENSE
661
  model.decoder.layers.15.final_layer_norm:
662
- approximation_function: LAYERNORM(fallback,4,float16)
663
  bias_format: SAME
664
  input_format: SAME
665
  instance: LayerNorm
@@ -698,7 +698,7 @@ model:
698
  weight_format: BFP[8|8]{64,-1}(SN)
699
  weight_sparseness: DENSE
700
  model.decoder.layers.15.self_attn.softmax:
701
- approximation_function: SOFTMAX(base2,float16)
702
  input_format: SAME
703
  instance: Softmax
704
  output_format: SAME
@@ -712,7 +712,7 @@ model:
712
  weight_format: BFP[8|8]{64,-1}(SN)
713
  weight_sparseness: DENSE
714
  model.decoder.layers.15.self_attn_layer_norm:
715
- approximation_function: LAYERNORM(fallback,4,float16)
716
  bias_format: SAME
717
  input_format: SAME
718
  instance: LayerNorm
@@ -747,7 +747,7 @@ model:
747
  weight_format: BFP[8|8]{64,-1}(SN)
748
  weight_sparseness: DENSE
749
  model.decoder.layers.16.final_layer_norm:
750
- approximation_function: LAYERNORM(fallback,4,float16)
751
  bias_format: SAME
752
  input_format: SAME
753
  instance: LayerNorm
@@ -786,7 +786,7 @@ model:
786
  weight_format: BFP[8|8]{64,-1}(SN)
787
  weight_sparseness: DENSE
788
  model.decoder.layers.16.self_attn.softmax:
789
- approximation_function: SOFTMAX(base2,float16)
790
  input_format: SAME
791
  instance: Softmax
792
  output_format: SAME
@@ -800,7 +800,7 @@ model:
800
  weight_format: BFP[8|8]{64,-1}(SN)
801
  weight_sparseness: DENSE
802
  model.decoder.layers.16.self_attn_layer_norm:
803
- approximation_function: LAYERNORM(fallback,4,float16)
804
  bias_format: SAME
805
  input_format: SAME
806
  instance: LayerNorm
@@ -835,7 +835,7 @@ model:
835
  weight_format: BFP[8|8]{64,-1}(SN)
836
  weight_sparseness: DENSE
837
  model.decoder.layers.17.final_layer_norm:
838
- approximation_function: LAYERNORM(fallback,4,float16)
839
  bias_format: SAME
840
  input_format: SAME
841
  instance: LayerNorm
@@ -874,7 +874,7 @@ model:
874
  weight_format: BFP[8|8]{64,-1}(SN)
875
  weight_sparseness: DENSE
876
  model.decoder.layers.17.self_attn.softmax:
877
- approximation_function: SOFTMAX(base2,float16)
878
  input_format: SAME
879
  instance: Softmax
880
  output_format: SAME
@@ -888,7 +888,7 @@ model:
888
  weight_format: BFP[8|8]{64,-1}(SN)
889
  weight_sparseness: DENSE
890
  model.decoder.layers.17.self_attn_layer_norm:
891
- approximation_function: LAYERNORM(fallback,4,float16)
892
  bias_format: SAME
893
  input_format: SAME
894
  instance: LayerNorm
@@ -923,7 +923,7 @@ model:
923
  weight_format: BFP[8|8]{64,-1}(SN)
924
  weight_sparseness: DENSE
925
  model.decoder.layers.18.final_layer_norm:
926
- approximation_function: LAYERNORM(fallback,4,float16)
927
  bias_format: SAME
928
  input_format: SAME
929
  instance: LayerNorm
@@ -962,7 +962,7 @@ model:
962
  weight_format: BFP[8|8]{64,-1}(SN)
963
  weight_sparseness: DENSE
964
  model.decoder.layers.18.self_attn.softmax:
965
- approximation_function: SOFTMAX(base2,float16)
966
  input_format: SAME
967
  instance: Softmax
968
  output_format: SAME
@@ -976,7 +976,7 @@ model:
976
  weight_format: BFP[8|8]{64,-1}(SN)
977
  weight_sparseness: DENSE
978
  model.decoder.layers.18.self_attn_layer_norm:
979
- approximation_function: LAYERNORM(fallback,4,float16)
980
  bias_format: SAME
981
  input_format: SAME
982
  instance: LayerNorm
@@ -1011,7 +1011,7 @@ model:
1011
  weight_format: BFP[8|8]{64,-1}(SN)
1012
  weight_sparseness: DENSE
1013
  model.decoder.layers.19.final_layer_norm:
1014
- approximation_function: LAYERNORM(fallback,4,float16)
1015
  bias_format: SAME
1016
  input_format: SAME
1017
  instance: LayerNorm
@@ -1050,7 +1050,7 @@ model:
1050
  weight_format: BFP[8|8]{64,-1}(SN)
1051
  weight_sparseness: DENSE
1052
  model.decoder.layers.19.self_attn.softmax:
1053
- approximation_function: SOFTMAX(base2,float16)
1054
  input_format: SAME
1055
  instance: Softmax
1056
  output_format: SAME
@@ -1064,7 +1064,7 @@ model:
1064
  weight_format: BFP[8|8]{64,-1}(SN)
1065
  weight_sparseness: DENSE
1066
  model.decoder.layers.19.self_attn_layer_norm:
1067
- approximation_function: LAYERNORM(fallback,4,float16)
1068
  bias_format: SAME
1069
  input_format: SAME
1070
  instance: LayerNorm
@@ -1099,7 +1099,7 @@ model:
1099
  weight_format: BFP[8|8]{64,-1}(SN)
1100
  weight_sparseness: DENSE
1101
  model.decoder.layers.2.final_layer_norm:
1102
- approximation_function: LAYERNORM(fallback,4,float16)
1103
  bias_format: SAME
1104
  input_format: SAME
1105
  instance: LayerNorm
@@ -1138,7 +1138,7 @@ model:
1138
  weight_format: BFP[8|8]{64,-1}(SN)
1139
  weight_sparseness: DENSE
1140
  model.decoder.layers.2.self_attn.softmax:
1141
- approximation_function: SOFTMAX(base2,float16)
1142
  input_format: SAME
1143
  instance: Softmax
1144
  output_format: SAME
@@ -1152,7 +1152,7 @@ model:
1152
  weight_format: BFP[8|8]{64,-1}(SN)
1153
  weight_sparseness: DENSE
1154
  model.decoder.layers.2.self_attn_layer_norm:
1155
- approximation_function: LAYERNORM(fallback,4,float16)
1156
  bias_format: SAME
1157
  input_format: SAME
1158
  instance: LayerNorm
@@ -1187,7 +1187,7 @@ model:
1187
  weight_format: BFP[8|8]{64,-1}(SN)
1188
  weight_sparseness: DENSE
1189
  model.decoder.layers.20.final_layer_norm:
1190
- approximation_function: LAYERNORM(fallback,4,float16)
1191
  bias_format: SAME
1192
  input_format: SAME
1193
  instance: LayerNorm
@@ -1226,7 +1226,7 @@ model:
1226
  weight_format: BFP[8|8]{64,-1}(SN)
1227
  weight_sparseness: DENSE
1228
  model.decoder.layers.20.self_attn.softmax:
1229
- approximation_function: SOFTMAX(base2,float16)
1230
  input_format: SAME
1231
  instance: Softmax
1232
  output_format: SAME
@@ -1240,7 +1240,7 @@ model:
1240
  weight_format: BFP[8|8]{64,-1}(SN)
1241
  weight_sparseness: DENSE
1242
  model.decoder.layers.20.self_attn_layer_norm:
1243
- approximation_function: LAYERNORM(fallback,4,float16)
1244
  bias_format: SAME
1245
  input_format: SAME
1246
  instance: LayerNorm
@@ -1275,7 +1275,7 @@ model:
1275
  weight_format: BFP[8|8]{64,-1}(SN)
1276
  weight_sparseness: DENSE
1277
  model.decoder.layers.21.final_layer_norm:
1278
- approximation_function: LAYERNORM(fallback,4,float16)
1279
  bias_format: SAME
1280
  input_format: SAME
1281
  instance: LayerNorm
@@ -1314,7 +1314,7 @@ model:
1314
  weight_format: BFP[8|8]{64,-1}(SN)
1315
  weight_sparseness: DENSE
1316
  model.decoder.layers.21.self_attn.softmax:
1317
- approximation_function: SOFTMAX(base2,float16)
1318
  input_format: SAME
1319
  instance: Softmax
1320
  output_format: SAME
@@ -1328,7 +1328,7 @@ model:
1328
  weight_format: BFP[8|8]{64,-1}(SN)
1329
  weight_sparseness: DENSE
1330
  model.decoder.layers.21.self_attn_layer_norm:
1331
- approximation_function: LAYERNORM(fallback,4,float16)
1332
  bias_format: SAME
1333
  input_format: SAME
1334
  instance: LayerNorm
@@ -1363,7 +1363,7 @@ model:
1363
  weight_format: BFP[8|8]{64,-1}(SN)
1364
  weight_sparseness: DENSE
1365
  model.decoder.layers.22.final_layer_norm:
1366
- approximation_function: LAYERNORM(fallback,4,float16)
1367
  bias_format: SAME
1368
  input_format: SAME
1369
  instance: LayerNorm
@@ -1402,7 +1402,7 @@ model:
1402
  weight_format: BFP[8|8]{64,-1}(SN)
1403
  weight_sparseness: DENSE
1404
  model.decoder.layers.22.self_attn.softmax:
1405
- approximation_function: SOFTMAX(base2,float16)
1406
  input_format: SAME
1407
  instance: Softmax
1408
  output_format: SAME
@@ -1416,7 +1416,7 @@ model:
1416
  weight_format: BFP[8|8]{64,-1}(SN)
1417
  weight_sparseness: DENSE
1418
  model.decoder.layers.22.self_attn_layer_norm:
1419
- approximation_function: LAYERNORM(fallback,4,float16)
1420
  bias_format: SAME
1421
  input_format: SAME
1422
  instance: LayerNorm
@@ -1451,7 +1451,7 @@ model:
1451
  weight_format: BFP[8|8]{64,-1}(SN)
1452
  weight_sparseness: DENSE
1453
  model.decoder.layers.23.final_layer_norm:
1454
- approximation_function: LAYERNORM(fallback,4,float16)
1455
  bias_format: SAME
1456
  input_format: SAME
1457
  instance: LayerNorm
@@ -1490,7 +1490,7 @@ model:
1490
  weight_format: BFP[8|8]{64,-1}(SN)
1491
  weight_sparseness: DENSE
1492
  model.decoder.layers.23.self_attn.softmax:
1493
- approximation_function: SOFTMAX(base2,float16)
1494
  input_format: SAME
1495
  instance: Softmax
1496
  output_format: SAME
@@ -1504,7 +1504,7 @@ model:
1504
  weight_format: BFP[8|8]{64,-1}(SN)
1505
  weight_sparseness: DENSE
1506
  model.decoder.layers.23.self_attn_layer_norm:
1507
- approximation_function: LAYERNORM(fallback,4,float16)
1508
  bias_format: SAME
1509
  input_format: SAME
1510
  instance: LayerNorm
@@ -1539,7 +1539,7 @@ model:
1539
  weight_format: BFP[8|8]{64,-1}(SN)
1540
  weight_sparseness: DENSE
1541
  model.decoder.layers.3.final_layer_norm:
1542
- approximation_function: LAYERNORM(fallback,4,float16)
1543
  bias_format: SAME
1544
  input_format: SAME
1545
  instance: LayerNorm
@@ -1578,7 +1578,7 @@ model:
1578
  weight_format: BFP[8|8]{64,-1}(SN)
1579
  weight_sparseness: DENSE
1580
  model.decoder.layers.3.self_attn.softmax:
1581
- approximation_function: SOFTMAX(base2,float16)
1582
  input_format: SAME
1583
  instance: Softmax
1584
  output_format: SAME
@@ -1592,7 +1592,7 @@ model:
1592
  weight_format: BFP[8|8]{64,-1}(SN)
1593
  weight_sparseness: DENSE
1594
  model.decoder.layers.3.self_attn_layer_norm:
1595
- approximation_function: LAYERNORM(fallback,4,float16)
1596
  bias_format: SAME
1597
  input_format: SAME
1598
  instance: LayerNorm
@@ -1627,7 +1627,7 @@ model:
1627
  weight_format: BFP[8|8]{64,-1}(SN)
1628
  weight_sparseness: DENSE
1629
  model.decoder.layers.4.final_layer_norm:
1630
- approximation_function: LAYERNORM(fallback,4,float16)
1631
  bias_format: SAME
1632
  input_format: SAME
1633
  instance: LayerNorm
@@ -1666,7 +1666,7 @@ model:
1666
  weight_format: BFP[8|8]{64,-1}(SN)
1667
  weight_sparseness: DENSE
1668
  model.decoder.layers.4.self_attn.softmax:
1669
- approximation_function: SOFTMAX(base2,float16)
1670
  input_format: SAME
1671
  instance: Softmax
1672
  output_format: SAME
@@ -1680,7 +1680,7 @@ model:
1680
  weight_format: BFP[8|8]{64,-1}(SN)
1681
  weight_sparseness: DENSE
1682
  model.decoder.layers.4.self_attn_layer_norm:
1683
- approximation_function: LAYERNORM(fallback,4,float16)
1684
  bias_format: SAME
1685
  input_format: SAME
1686
  instance: LayerNorm
@@ -1715,7 +1715,7 @@ model:
1715
  weight_format: BFP[8|8]{64,-1}(SN)
1716
  weight_sparseness: DENSE
1717
  model.decoder.layers.5.final_layer_norm:
1718
- approximation_function: LAYERNORM(fallback,4,float16)
1719
  bias_format: SAME
1720
  input_format: SAME
1721
  instance: LayerNorm
@@ -1754,7 +1754,7 @@ model:
1754
  weight_format: BFP[8|8]{64,-1}(SN)
1755
  weight_sparseness: DENSE
1756
  model.decoder.layers.5.self_attn.softmax:
1757
- approximation_function: SOFTMAX(base2,float16)
1758
  input_format: SAME
1759
  instance: Softmax
1760
  output_format: SAME
@@ -1768,7 +1768,7 @@ model:
1768
  weight_format: BFP[8|8]{64,-1}(SN)
1769
  weight_sparseness: DENSE
1770
  model.decoder.layers.5.self_attn_layer_norm:
1771
- approximation_function: LAYERNORM(fallback,4,float16)
1772
  bias_format: SAME
1773
  input_format: SAME
1774
  instance: LayerNorm
@@ -1803,7 +1803,7 @@ model:
1803
  weight_format: BFP[8|8]{64,-1}(SN)
1804
  weight_sparseness: DENSE
1805
  model.decoder.layers.6.final_layer_norm:
1806
- approximation_function: LAYERNORM(fallback,4,float16)
1807
  bias_format: SAME
1808
  input_format: SAME
1809
  instance: LayerNorm
@@ -1842,7 +1842,7 @@ model:
1842
  weight_format: BFP[8|8]{64,-1}(SN)
1843
  weight_sparseness: DENSE
1844
  model.decoder.layers.6.self_attn.softmax:
1845
- approximation_function: SOFTMAX(base2,float16)
1846
  input_format: SAME
1847
  instance: Softmax
1848
  output_format: SAME
@@ -1856,7 +1856,7 @@ model:
1856
  weight_format: BFP[8|8]{64,-1}(SN)
1857
  weight_sparseness: DENSE
1858
  model.decoder.layers.6.self_attn_layer_norm:
1859
- approximation_function: LAYERNORM(fallback,4,float16)
1860
  bias_format: SAME
1861
  input_format: SAME
1862
  instance: LayerNorm
@@ -1891,7 +1891,7 @@ model:
1891
  weight_format: BFP[8|8]{64,-1}(SN)
1892
  weight_sparseness: DENSE
1893
  model.decoder.layers.7.final_layer_norm:
1894
- approximation_function: LAYERNORM(fallback,4,float16)
1895
  bias_format: SAME
1896
  input_format: SAME
1897
  instance: LayerNorm
@@ -1930,7 +1930,7 @@ model:
1930
  weight_format: BFP[8|8]{64,-1}(SN)
1931
  weight_sparseness: DENSE
1932
  model.decoder.layers.7.self_attn.softmax:
1933
- approximation_function: SOFTMAX(base2,float16)
1934
  input_format: SAME
1935
  instance: Softmax
1936
  output_format: SAME
@@ -1944,7 +1944,7 @@ model:
1944
  weight_format: BFP[8|8]{64,-1}(SN)
1945
  weight_sparseness: DENSE
1946
  model.decoder.layers.7.self_attn_layer_norm:
1947
- approximation_function: LAYERNORM(fallback,4,float16)
1948
  bias_format: SAME
1949
  input_format: SAME
1950
  instance: LayerNorm
@@ -1979,7 +1979,7 @@ model:
1979
  weight_format: BFP[8|8]{64,-1}(SN)
1980
  weight_sparseness: DENSE
1981
  model.decoder.layers.8.final_layer_norm:
1982
- approximation_function: LAYERNORM(fallback,4,float16)
1983
  bias_format: SAME
1984
  input_format: SAME
1985
  instance: LayerNorm
@@ -2018,7 +2018,7 @@ model:
2018
  weight_format: BFP[8|8]{64,-1}(SN)
2019
  weight_sparseness: DENSE
2020
  model.decoder.layers.8.self_attn.softmax:
2021
- approximation_function: SOFTMAX(base2,float16)
2022
  input_format: SAME
2023
  instance: Softmax
2024
  output_format: SAME
@@ -2032,7 +2032,7 @@ model:
2032
  weight_format: BFP[8|8]{64,-1}(SN)
2033
  weight_sparseness: DENSE
2034
  model.decoder.layers.8.self_attn_layer_norm:
2035
- approximation_function: LAYERNORM(fallback,4,float16)
2036
  bias_format: SAME
2037
  input_format: SAME
2038
  instance: LayerNorm
@@ -2067,7 +2067,7 @@ model:
2067
  weight_format: BFP[8|8]{64,-1}(SN)
2068
  weight_sparseness: DENSE
2069
  model.decoder.layers.9.final_layer_norm:
2070
- approximation_function: LAYERNORM(fallback,4,float16)
2071
  bias_format: SAME
2072
  input_format: SAME
2073
  instance: LayerNorm
@@ -2106,7 +2106,7 @@ model:
2106
  weight_format: BFP[8|8]{64,-1}(SN)
2107
  weight_sparseness: DENSE
2108
  model.decoder.layers.9.self_attn.softmax:
2109
- approximation_function: SOFTMAX(base2,float16)
2110
  input_format: SAME
2111
  instance: Softmax
2112
  output_format: SAME
@@ -2120,7 +2120,7 @@ model:
2120
  weight_format: BFP[8|8]{64,-1}(SN)
2121
  weight_sparseness: DENSE
2122
  model.decoder.layers.9.self_attn_layer_norm:
2123
- approximation_function: LAYERNORM(fallback,4,float16)
2124
  bias_format: SAME
2125
  input_format: SAME
2126
  instance: LayerNorm
 
8
  weight_format: SAME
9
  weight_sparseness: DENSE
10
  model.decoder.final_layer_norm:
11
+ approximation_function: NONE
12
  bias_format: SAME
13
  input_format: SAME
14
  instance: LayerNorm
 
43
  weight_format: BFP[8|8]{64,-1}(SN)
44
  weight_sparseness: DENSE
45
  model.decoder.layers.0.final_layer_norm:
46
+ approximation_function: NONE
47
  bias_format: SAME
48
  input_format: SAME
49
  instance: LayerNorm
 
82
  weight_format: BFP[8|8]{64,-1}(SN)
83
  weight_sparseness: DENSE
84
  model.decoder.layers.0.self_attn.softmax:
85
+ approximation_function: NONE
86
  input_format: SAME
87
  instance: Softmax
88
  output_format: SAME
 
96
  weight_format: BFP[8|8]{64,-1}(SN)
97
  weight_sparseness: DENSE
98
  model.decoder.layers.0.self_attn_layer_norm:
99
+ approximation_function: NONE
100
  bias_format: SAME
101
  input_format: SAME
102
  instance: LayerNorm
 
131
  weight_format: BFP[8|8]{64,-1}(SN)
132
  weight_sparseness: DENSE
133
  model.decoder.layers.1.final_layer_norm:
134
+ approximation_function: NONE
135
  bias_format: SAME
136
  input_format: SAME
137
  instance: LayerNorm
 
170
  weight_format: BFP[8|8]{64,-1}(SN)
171
  weight_sparseness: DENSE
172
  model.decoder.layers.1.self_attn.softmax:
173
+ approximation_function: NONE
174
  input_format: SAME
175
  instance: Softmax
176
  output_format: SAME
 
184
  weight_format: BFP[8|8]{64,-1}(SN)
185
  weight_sparseness: DENSE
186
  model.decoder.layers.1.self_attn_layer_norm:
187
+ approximation_function: NONE
188
  bias_format: SAME
189
  input_format: SAME
190
  instance: LayerNorm
 
219
  weight_format: BFP[8|8]{64,-1}(SN)
220
  weight_sparseness: DENSE
221
  model.decoder.layers.10.final_layer_norm:
222
+ approximation_function: NONE
223
  bias_format: SAME
224
  input_format: SAME
225
  instance: LayerNorm
 
258
  weight_format: BFP[8|8]{64,-1}(SN)
259
  weight_sparseness: DENSE
260
  model.decoder.layers.10.self_attn.softmax:
261
+ approximation_function: NONE
262
  input_format: SAME
263
  instance: Softmax
264
  output_format: SAME
 
272
  weight_format: BFP[8|8]{64,-1}(SN)
273
  weight_sparseness: DENSE
274
  model.decoder.layers.10.self_attn_layer_norm:
275
+ approximation_function: NONE
276
  bias_format: SAME
277
  input_format: SAME
278
  instance: LayerNorm
 
307
  weight_format: BFP[8|8]{64,-1}(SN)
308
  weight_sparseness: DENSE
309
  model.decoder.layers.11.final_layer_norm:
310
+ approximation_function: NONE
311
  bias_format: SAME
312
  input_format: SAME
313
  instance: LayerNorm
 
346
  weight_format: BFP[8|8]{64,-1}(SN)
347
  weight_sparseness: DENSE
348
  model.decoder.layers.11.self_attn.softmax:
349
+ approximation_function: NONE
350
  input_format: SAME
351
  instance: Softmax
352
  output_format: SAME
 
360
  weight_format: BFP[8|8]{64,-1}(SN)
361
  weight_sparseness: DENSE
362
  model.decoder.layers.11.self_attn_layer_norm:
363
+ approximation_function: NONE
364
  bias_format: SAME
365
  input_format: SAME
366
  instance: LayerNorm
 
395
  weight_format: BFP[8|8]{64,-1}(SN)
396
  weight_sparseness: DENSE
397
  model.decoder.layers.12.final_layer_norm:
398
+ approximation_function: NONE
399
  bias_format: SAME
400
  input_format: SAME
401
  instance: LayerNorm
 
434
  weight_format: BFP[8|8]{64,-1}(SN)
435
  weight_sparseness: DENSE
436
  model.decoder.layers.12.self_attn.softmax:
437
+ approximation_function: NONE
438
  input_format: SAME
439
  instance: Softmax
440
  output_format: SAME
 
448
  weight_format: BFP[8|8]{64,-1}(SN)
449
  weight_sparseness: DENSE
450
  model.decoder.layers.12.self_attn_layer_norm:
451
+ approximation_function: NONE
452
  bias_format: SAME
453
  input_format: SAME
454
  instance: LayerNorm
 
483
  weight_format: BFP[8|8]{64,-1}(SN)
484
  weight_sparseness: DENSE
485
  model.decoder.layers.13.final_layer_norm:
486
+ approximation_function: NONE
487
  bias_format: SAME
488
  input_format: SAME
489
  instance: LayerNorm
 
522
  weight_format: BFP[8|8]{64,-1}(SN)
523
  weight_sparseness: DENSE
524
  model.decoder.layers.13.self_attn.softmax:
525
+ approximation_function: NONE
526
  input_format: SAME
527
  instance: Softmax
528
  output_format: SAME
 
536
  weight_format: BFP[8|8]{64,-1}(SN)
537
  weight_sparseness: DENSE
538
  model.decoder.layers.13.self_attn_layer_norm:
539
+ approximation_function: NONE
540
  bias_format: SAME
541
  input_format: SAME
542
  instance: LayerNorm
 
571
  weight_format: BFP[8|8]{64,-1}(SN)
572
  weight_sparseness: DENSE
573
  model.decoder.layers.14.final_layer_norm:
574
+ approximation_function: NONE
575
  bias_format: SAME
576
  input_format: SAME
577
  instance: LayerNorm
 
610
  weight_format: BFP[8|8]{64,-1}(SN)
611
  weight_sparseness: DENSE
612
  model.decoder.layers.14.self_attn.softmax:
613
+ approximation_function: NONE
614
  input_format: SAME
615
  instance: Softmax
616
  output_format: SAME
 
624
  weight_format: BFP[8|8]{64,-1}(SN)
625
  weight_sparseness: DENSE
626
  model.decoder.layers.14.self_attn_layer_norm:
627
+ approximation_function: NONE
628
  bias_format: SAME
629
  input_format: SAME
630
  instance: LayerNorm
 
659
  weight_format: BFP[8|8]{64,-1}(SN)
660
  weight_sparseness: DENSE
661
  model.decoder.layers.15.final_layer_norm:
662
+ approximation_function: NONE
663
  bias_format: SAME
664
  input_format: SAME
665
  instance: LayerNorm
 
698
  weight_format: BFP[8|8]{64,-1}(SN)
699
  weight_sparseness: DENSE
700
  model.decoder.layers.15.self_attn.softmax:
701
+ approximation_function: NONE
702
  input_format: SAME
703
  instance: Softmax
704
  output_format: SAME
 
712
  weight_format: BFP[8|8]{64,-1}(SN)
713
  weight_sparseness: DENSE
714
  model.decoder.layers.15.self_attn_layer_norm:
715
+ approximation_function: NONE
716
  bias_format: SAME
717
  input_format: SAME
718
  instance: LayerNorm
 
747
  weight_format: BFP[8|8]{64,-1}(SN)
748
  weight_sparseness: DENSE
749
  model.decoder.layers.16.final_layer_norm:
750
+ approximation_function: NONE
751
  bias_format: SAME
752
  input_format: SAME
753
  instance: LayerNorm
 
786
  weight_format: BFP[8|8]{64,-1}(SN)
787
  weight_sparseness: DENSE
788
  model.decoder.layers.16.self_attn.softmax:
789
+ approximation_function: NONE
790
  input_format: SAME
791
  instance: Softmax
792
  output_format: SAME
 
800
  weight_format: BFP[8|8]{64,-1}(SN)
801
  weight_sparseness: DENSE
802
  model.decoder.layers.16.self_attn_layer_norm:
803
+ approximation_function: NONE
804
  bias_format: SAME
805
  input_format: SAME
806
  instance: LayerNorm
 
835
  weight_format: BFP[8|8]{64,-1}(SN)
836
  weight_sparseness: DENSE
837
  model.decoder.layers.17.final_layer_norm:
838
+ approximation_function: NONE
839
  bias_format: SAME
840
  input_format: SAME
841
  instance: LayerNorm
 
874
  weight_format: BFP[8|8]{64,-1}(SN)
875
  weight_sparseness: DENSE
876
  model.decoder.layers.17.self_attn.softmax:
877
+ approximation_function: NONE
878
  input_format: SAME
879
  instance: Softmax
880
  output_format: SAME
 
888
  weight_format: BFP[8|8]{64,-1}(SN)
889
  weight_sparseness: DENSE
890
  model.decoder.layers.17.self_attn_layer_norm:
891
+ approximation_function: NONE
892
  bias_format: SAME
893
  input_format: SAME
894
  instance: LayerNorm
 
923
  weight_format: BFP[8|8]{64,-1}(SN)
924
  weight_sparseness: DENSE
925
  model.decoder.layers.18.final_layer_norm:
926
+ approximation_function: NONE
927
  bias_format: SAME
928
  input_format: SAME
929
  instance: LayerNorm
 
962
  weight_format: BFP[8|8]{64,-1}(SN)
963
  weight_sparseness: DENSE
964
  model.decoder.layers.18.self_attn.softmax:
965
+ approximation_function: NONE
966
  input_format: SAME
967
  instance: Softmax
968
  output_format: SAME
 
976
  weight_format: BFP[8|8]{64,-1}(SN)
977
  weight_sparseness: DENSE
978
  model.decoder.layers.18.self_attn_layer_norm:
979
+ approximation_function: NONE
980
  bias_format: SAME
981
  input_format: SAME
982
  instance: LayerNorm
 
1011
  weight_format: BFP[8|8]{64,-1}(SN)
1012
  weight_sparseness: DENSE
1013
  model.decoder.layers.19.final_layer_norm:
1014
+ approximation_function: NONE
1015
  bias_format: SAME
1016
  input_format: SAME
1017
  instance: LayerNorm
 
1050
  weight_format: BFP[8|8]{64,-1}(SN)
1051
  weight_sparseness: DENSE
1052
  model.decoder.layers.19.self_attn.softmax:
1053
+ approximation_function: NONE
1054
  input_format: SAME
1055
  instance: Softmax
1056
  output_format: SAME
 
1064
  weight_format: BFP[8|8]{64,-1}(SN)
1065
  weight_sparseness: DENSE
1066
  model.decoder.layers.19.self_attn_layer_norm:
1067
+ approximation_function: NONE
1068
  bias_format: SAME
1069
  input_format: SAME
1070
  instance: LayerNorm
 
1099
  weight_format: BFP[8|8]{64,-1}(SN)
1100
  weight_sparseness: DENSE
1101
  model.decoder.layers.2.final_layer_norm:
1102
+ approximation_function: NONE
1103
  bias_format: SAME
1104
  input_format: SAME
1105
  instance: LayerNorm
 
1138
  weight_format: BFP[8|8]{64,-1}(SN)
1139
  weight_sparseness: DENSE
1140
  model.decoder.layers.2.self_attn.softmax:
1141
+ approximation_function: NONE
1142
  input_format: SAME
1143
  instance: Softmax
1144
  output_format: SAME
 
1152
  weight_format: BFP[8|8]{64,-1}(SN)
1153
  weight_sparseness: DENSE
1154
  model.decoder.layers.2.self_attn_layer_norm:
1155
+ approximation_function: NONE
1156
  bias_format: SAME
1157
  input_format: SAME
1158
  instance: LayerNorm
 
1187
  weight_format: BFP[8|8]{64,-1}(SN)
1188
  weight_sparseness: DENSE
1189
  model.decoder.layers.20.final_layer_norm:
1190
+ approximation_function: NONE
1191
  bias_format: SAME
1192
  input_format: SAME
1193
  instance: LayerNorm
 
1226
  weight_format: BFP[8|8]{64,-1}(SN)
1227
  weight_sparseness: DENSE
1228
  model.decoder.layers.20.self_attn.softmax:
1229
+ approximation_function: NONE
1230
  input_format: SAME
1231
  instance: Softmax
1232
  output_format: SAME
 
1240
  weight_format: BFP[8|8]{64,-1}(SN)
1241
  weight_sparseness: DENSE
1242
  model.decoder.layers.20.self_attn_layer_norm:
1243
+ approximation_function: NONE
1244
  bias_format: SAME
1245
  input_format: SAME
1246
  instance: LayerNorm
 
1275
  weight_format: BFP[8|8]{64,-1}(SN)
1276
  weight_sparseness: DENSE
1277
  model.decoder.layers.21.final_layer_norm:
1278
+ approximation_function: NONE
1279
  bias_format: SAME
1280
  input_format: SAME
1281
  instance: LayerNorm
 
1314
  weight_format: BFP[8|8]{64,-1}(SN)
1315
  weight_sparseness: DENSE
1316
  model.decoder.layers.21.self_attn.softmax:
1317
+ approximation_function: NONE
1318
  input_format: SAME
1319
  instance: Softmax
1320
  output_format: SAME
 
1328
  weight_format: BFP[8|8]{64,-1}(SN)
1329
  weight_sparseness: DENSE
1330
  model.decoder.layers.21.self_attn_layer_norm:
1331
+ approximation_function: NONE
1332
  bias_format: SAME
1333
  input_format: SAME
1334
  instance: LayerNorm
 
1363
  weight_format: BFP[8|8]{64,-1}(SN)
1364
  weight_sparseness: DENSE
1365
  model.decoder.layers.22.final_layer_norm:
1366
+ approximation_function: NONE
1367
  bias_format: SAME
1368
  input_format: SAME
1369
  instance: LayerNorm
 
1402
  weight_format: BFP[8|8]{64,-1}(SN)
1403
  weight_sparseness: DENSE
1404
  model.decoder.layers.22.self_attn.softmax:
1405
+ approximation_function: NONE
1406
  input_format: SAME
1407
  instance: Softmax
1408
  output_format: SAME
 
1416
  weight_format: BFP[8|8]{64,-1}(SN)
1417
  weight_sparseness: DENSE
1418
  model.decoder.layers.22.self_attn_layer_norm:
1419
+ approximation_function: NONE
1420
  bias_format: SAME
1421
  input_format: SAME
1422
  instance: LayerNorm
 
1451
  weight_format: BFP[8|8]{64,-1}(SN)
1452
  weight_sparseness: DENSE
1453
  model.decoder.layers.23.final_layer_norm:
1454
+ approximation_function: NONE
1455
  bias_format: SAME
1456
  input_format: SAME
1457
  instance: LayerNorm
 
1490
  weight_format: BFP[8|8]{64,-1}(SN)
1491
  weight_sparseness: DENSE
1492
  model.decoder.layers.23.self_attn.softmax:
1493
+ approximation_function: NONE
1494
  input_format: SAME
1495
  instance: Softmax
1496
  output_format: SAME
 
1504
  weight_format: BFP[8|8]{64,-1}(SN)
1505
  weight_sparseness: DENSE
1506
  model.decoder.layers.23.self_attn_layer_norm:
1507
+ approximation_function: NONE
1508
  bias_format: SAME
1509
  input_format: SAME
1510
  instance: LayerNorm
 
1539
  weight_format: BFP[8|8]{64,-1}(SN)
1540
  weight_sparseness: DENSE
1541
  model.decoder.layers.3.final_layer_norm:
1542
+ approximation_function: NONE
1543
  bias_format: SAME
1544
  input_format: SAME
1545
  instance: LayerNorm
 
1578
  weight_format: BFP[8|8]{64,-1}(SN)
1579
  weight_sparseness: DENSE
1580
  model.decoder.layers.3.self_attn.softmax:
1581
+ approximation_function: NONE
1582
  input_format: SAME
1583
  instance: Softmax
1584
  output_format: SAME
 
1592
  weight_format: BFP[8|8]{64,-1}(SN)
1593
  weight_sparseness: DENSE
1594
  model.decoder.layers.3.self_attn_layer_norm:
1595
+ approximation_function: NONE
1596
  bias_format: SAME
1597
  input_format: SAME
1598
  instance: LayerNorm
 
1627
  weight_format: BFP[8|8]{64,-1}(SN)
1628
  weight_sparseness: DENSE
1629
  model.decoder.layers.4.final_layer_norm:
1630
+ approximation_function: NONE
1631
  bias_format: SAME
1632
  input_format: SAME
1633
  instance: LayerNorm
 
1666
  weight_format: BFP[8|8]{64,-1}(SN)
1667
  weight_sparseness: DENSE
1668
  model.decoder.layers.4.self_attn.softmax:
1669
+ approximation_function: NONE
1670
  input_format: SAME
1671
  instance: Softmax
1672
  output_format: SAME
 
1680
  weight_format: BFP[8|8]{64,-1}(SN)
1681
  weight_sparseness: DENSE
1682
  model.decoder.layers.4.self_attn_layer_norm:
1683
+ approximation_function: NONE
1684
  bias_format: SAME
1685
  input_format: SAME
1686
  instance: LayerNorm
 
1715
  weight_format: BFP[8|8]{64,-1}(SN)
1716
  weight_sparseness: DENSE
1717
  model.decoder.layers.5.final_layer_norm:
1718
+ approximation_function: NONE
1719
  bias_format: SAME
1720
  input_format: SAME
1721
  instance: LayerNorm
 
1754
  weight_format: BFP[8|8]{64,-1}(SN)
1755
  weight_sparseness: DENSE
1756
  model.decoder.layers.5.self_attn.softmax:
1757
+ approximation_function: NONE
1758
  input_format: SAME
1759
  instance: Softmax
1760
  output_format: SAME
 
1768
  weight_format: BFP[8|8]{64,-1}(SN)
1769
  weight_sparseness: DENSE
1770
  model.decoder.layers.5.self_attn_layer_norm:
1771
+ approximation_function: NONE
1772
  bias_format: SAME
1773
  input_format: SAME
1774
  instance: LayerNorm
 
1803
  weight_format: BFP[8|8]{64,-1}(SN)
1804
  weight_sparseness: DENSE
1805
  model.decoder.layers.6.final_layer_norm:
1806
+ approximation_function: NONE
1807
  bias_format: SAME
1808
  input_format: SAME
1809
  instance: LayerNorm
 
1842
  weight_format: BFP[8|8]{64,-1}(SN)
1843
  weight_sparseness: DENSE
1844
  model.decoder.layers.6.self_attn.softmax:
1845
+ approximation_function: NONE
1846
  input_format: SAME
1847
  instance: Softmax
1848
  output_format: SAME
 
1856
  weight_format: BFP[8|8]{64,-1}(SN)
1857
  weight_sparseness: DENSE
1858
  model.decoder.layers.6.self_attn_layer_norm:
1859
+ approximation_function: NONE
1860
  bias_format: SAME
1861
  input_format: SAME
1862
  instance: LayerNorm
 
1891
  weight_format: BFP[8|8]{64,-1}(SN)
1892
  weight_sparseness: DENSE
1893
  model.decoder.layers.7.final_layer_norm:
1894
+ approximation_function: NONE
1895
  bias_format: SAME
1896
  input_format: SAME
1897
  instance: LayerNorm
 
1930
  weight_format: BFP[8|8]{64,-1}(SN)
1931
  weight_sparseness: DENSE
1932
  model.decoder.layers.7.self_attn.softmax:
1933
+ approximation_function: NONE
1934
  input_format: SAME
1935
  instance: Softmax
1936
  output_format: SAME
 
1944
  weight_format: BFP[8|8]{64,-1}(SN)
1945
  weight_sparseness: DENSE
1946
  model.decoder.layers.7.self_attn_layer_norm:
1947
+ approximation_function: NONE
1948
  bias_format: SAME
1949
  input_format: SAME
1950
  instance: LayerNorm
 
1979
  weight_format: BFP[8|8]{64,-1}(SN)
1980
  weight_sparseness: DENSE
1981
  model.decoder.layers.8.final_layer_norm:
1982
+ approximation_function: NONE
1983
  bias_format: SAME
1984
  input_format: SAME
1985
  instance: LayerNorm
 
2018
  weight_format: BFP[8|8]{64,-1}(SN)
2019
  weight_sparseness: DENSE
2020
  model.decoder.layers.8.self_attn.softmax:
2021
+ approximation_function: NONE
2022
  input_format: SAME
2023
  instance: Softmax
2024
  output_format: SAME
 
2032
  weight_format: BFP[8|8]{64,-1}(SN)
2033
  weight_sparseness: DENSE
2034
  model.decoder.layers.8.self_attn_layer_norm:
2035
+ approximation_function: NONE
2036
  bias_format: SAME
2037
  input_format: SAME
2038
  instance: LayerNorm
 
2067
  weight_format: BFP[8|8]{64,-1}(SN)
2068
  weight_sparseness: DENSE
2069
  model.decoder.layers.9.final_layer_norm:
2070
+ approximation_function: NONE
2071
  bias_format: SAME
2072
  input_format: SAME
2073
  instance: LayerNorm
 
2106
  weight_format: BFP[8|8]{64,-1}(SN)
2107
  weight_sparseness: DENSE
2108
  model.decoder.layers.9.self_attn.softmax:
2109
+ approximation_function: NONE
2110
  input_format: SAME
2111
  instance: Softmax
2112
  output_format: SAME
 
2120
  weight_format: BFP[8|8]{64,-1}(SN)
2121
  weight_sparseness: DENSE
2122
  model.decoder.layers.9.self_attn_layer_norm:
2123
+ approximation_function: NONE
2124
  bias_format: SAME
2125
  input_format: SAME
2126
  instance: LayerNorm