quantize per channel

Files changed (5) hide show

config.json CHANGED Viewed

@@ -185,5 +185,5 @@
   "model_type": "vision-encoder-decoder",
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
-  "transformers_version": "4.35.2"
 }

   "model_type": "vision-encoder-decoder",
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
+  "transformers_version": "4.38.1"
 }

decoder_model_quantized.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b79247a2b0ba7d349987de3e462f42e395f9d7d0989c968975977e3a97563e85
-size 173838810

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9c8f1bb7456441320245595f93fa052612b9f477b71f4822a46c070faf48e8d
+size 174444105

decoder_with_past_model_quantized.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b027b072abe7165445dcec03f8492b9201fdfb2e196ed870febbdbe8508329e
-size 165366466

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a68ddb99f8e476a6ba4e356145440de0fa70a5d2cff89c136feba9691f27c98
+size 165923718

encoder_model_quantized.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:febe91664da233b0b71a61217a84c7d5d9b5db6d7991eec0e761742fd4c4d5ee
-size 81514092

 version https://git-lfs.github.com/spec/v1
+oid sha256:6bec906871770afe111f0f3474bd5cb143cfd7f659c242431ee5431be580684d
+size 82052587

ort_config.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "one_external_file": true,
   "opset": null,
   "optimization": {},
-  "optimum_version": "1.14.1",
   "quantization": {
     "activations_dtype": "QUInt8",
     "activations_symmetric": false,
@@ -20,7 +20,7 @@
       "Transpose",
       "EmbedLayerNormalization"
     ],
-    "per_channel": false,
     "qdq_add_pair_to_weight": false,
     "qdq_dedicated_pair": false,
     "qdq_op_type_per_channel_support_to_axis": {
@@ -30,6 +30,6 @@
     "weights_dtype": "QUInt8",
     "weights_symmetric": true
   },
-  "transformers_version": "4.35.2",
   "use_external_data_format": false
 }

   "one_external_file": true,
   "opset": null,
   "optimization": {},
+  "optimum_version": "1.17.1",
   "quantization": {
     "activations_dtype": "QUInt8",
     "activations_symmetric": false,
       "Transpose",
       "EmbedLayerNormalization"
     ],
+    "per_channel": true,
     "qdq_add_pair_to_weight": false,
     "qdq_dedicated_pair": false,
     "qdq_op_type_per_channel_support_to_axis": {
     "weights_dtype": "QUInt8",
     "weights_symmetric": true
   },
+  "transformers_version": "4.38.1",
   "use_external_data_format": false
 }