d-matrix
/

opt-1b3

PyTorch

opt

custom_code

Eval Results

Model card Files Files and versions Community

d-matrix commited on Feb 29, 2024

Commit

8f5ab24

verified ·

1 Parent(s): fbd0599

Update configs/BASIC.yaml

Browse files

Files changed (1) hide show

configs/BASIC.yaml +73 -73

configs/BASIC.yaml CHANGED Viewed

@@ -8,7 +8,7 @@ model:
     weight_format: SAME
     weight_sparseness: DENSE
   model.decoder.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -43,7 +43,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.0.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -82,7 +82,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.0.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -96,7 +96,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.0.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -131,7 +131,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.1.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -170,7 +170,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.1.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -184,7 +184,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.1.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -219,7 +219,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.10.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -258,7 +258,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.10.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -272,7 +272,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.10.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -307,7 +307,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.11.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -346,7 +346,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.11.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -360,7 +360,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.11.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -395,7 +395,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.12.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -434,7 +434,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.12.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -448,7 +448,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.12.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -483,7 +483,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.13.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -522,7 +522,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.13.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -536,7 +536,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.13.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -571,7 +571,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.14.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -610,7 +610,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.14.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -624,7 +624,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.14.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -659,7 +659,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.15.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -698,7 +698,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.15.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -712,7 +712,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.15.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -747,7 +747,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.16.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -786,7 +786,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.16.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -800,7 +800,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.16.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -835,7 +835,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.17.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -874,7 +874,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.17.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -888,7 +888,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.17.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -923,7 +923,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.18.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -962,7 +962,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.18.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -976,7 +976,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.18.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1011,7 +1011,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.19.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1050,7 +1050,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.19.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -1064,7 +1064,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.19.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1099,7 +1099,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.2.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1138,7 +1138,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.2.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -1152,7 +1152,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.2.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1187,7 +1187,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.20.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1226,7 +1226,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.20.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -1240,7 +1240,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.20.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1275,7 +1275,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.21.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1314,7 +1314,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.21.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -1328,7 +1328,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.21.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1363,7 +1363,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.22.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1402,7 +1402,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.22.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -1416,7 +1416,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.22.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1451,7 +1451,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.23.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1490,7 +1490,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.23.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -1504,7 +1504,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.23.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1539,7 +1539,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.3.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1578,7 +1578,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.3.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -1592,7 +1592,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.3.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1627,7 +1627,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.4.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1666,7 +1666,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.4.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -1680,7 +1680,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.4.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1715,7 +1715,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.5.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1754,7 +1754,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.5.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -1768,7 +1768,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.5.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1803,7 +1803,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.6.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1842,7 +1842,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.6.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -1856,7 +1856,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.6.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1891,7 +1891,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.7.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1930,7 +1930,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.7.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -1944,7 +1944,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.7.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -1979,7 +1979,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.8.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -2018,7 +2018,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.8.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -2032,7 +2032,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.8.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -2067,7 +2067,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.9.final_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
@@ -2106,7 +2106,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.9.self_attn.softmax:
-    approximation_function: SOFTMAX(base2,float16)
     input_format: SAME
     instance: Softmax
     output_format: SAME
@@ -2120,7 +2120,7 @@ model:
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.9.self_attn_layer_norm:
-    approximation_function: LAYERNORM(fallback,4,float16)
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm

     weight_format: SAME
     weight_sparseness: DENSE
   model.decoder.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.0.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.0.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.0.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.1.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.1.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.1.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.10.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.10.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.10.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.11.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.11.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.11.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.12.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.12.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.12.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.13.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.13.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.13.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.14.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.14.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.14.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.15.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.15.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.15.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.16.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.16.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.16.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.17.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.17.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.17.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.18.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.18.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.18.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.19.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.19.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.19.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.2.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.2.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.2.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.20.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.20.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.20.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.21.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.21.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.21.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.22.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.22.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.22.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.23.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.23.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.23.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.3.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.3.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.3.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.4.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.4.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.4.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.5.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.5.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.5.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.6.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.6.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.6.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.7.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.7.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.7.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.8.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.8.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.8.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.9.final_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.9.self_attn.softmax:
+    approximation_function: NONE
     input_format: SAME
     instance: Softmax
     output_format: SAME
     weight_format: BFP[8|8]{64,-1}(SN)
     weight_sparseness: DENSE
   model.decoder.layers.9.self_attn_layer_norm:
+    approximation_function: NONE
     bias_format: SAME
     input_format: SAME
     instance: LayerNorm