quantize per channel
Browse files- config.json +1 -1
- decoder_model_quantized.onnx +2 -2
- decoder_with_past_model_quantized.onnx +2 -2
- encoder_model_quantized.onnx +2 -2
- ort_config.json +3 -3
config.json
CHANGED
@@ -185,5 +185,5 @@
|
|
185 |
"model_type": "vision-encoder-decoder",
|
186 |
"tie_word_embeddings": false,
|
187 |
"torch_dtype": "float32",
|
188 |
-
"transformers_version": "4.
|
189 |
}
|
|
|
185 |
"model_type": "vision-encoder-decoder",
|
186 |
"tie_word_embeddings": false,
|
187 |
"torch_dtype": "float32",
|
188 |
+
"transformers_version": "4.38.1"
|
189 |
}
|
decoder_model_quantized.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9c8f1bb7456441320245595f93fa052612b9f477b71f4822a46c070faf48e8d
|
3 |
+
size 174444105
|
decoder_with_past_model_quantized.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a68ddb99f8e476a6ba4e356145440de0fa70a5d2cff89c136feba9691f27c98
|
3 |
+
size 165923718
|
encoder_model_quantized.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bec906871770afe111f0f3474bd5cb143cfd7f659c242431ee5431be580684d
|
3 |
+
size 82052587
|
ort_config.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"one_external_file": true,
|
3 |
"opset": null,
|
4 |
"optimization": {},
|
5 |
-
"optimum_version": "1.
|
6 |
"quantization": {
|
7 |
"activations_dtype": "QUInt8",
|
8 |
"activations_symmetric": false,
|
@@ -20,7 +20,7 @@
|
|
20 |
"Transpose",
|
21 |
"EmbedLayerNormalization"
|
22 |
],
|
23 |
-
"per_channel":
|
24 |
"qdq_add_pair_to_weight": false,
|
25 |
"qdq_dedicated_pair": false,
|
26 |
"qdq_op_type_per_channel_support_to_axis": {
|
@@ -30,6 +30,6 @@
|
|
30 |
"weights_dtype": "QUInt8",
|
31 |
"weights_symmetric": true
|
32 |
},
|
33 |
-
"transformers_version": "4.
|
34 |
"use_external_data_format": false
|
35 |
}
|
|
|
2 |
"one_external_file": true,
|
3 |
"opset": null,
|
4 |
"optimization": {},
|
5 |
+
"optimum_version": "1.17.1",
|
6 |
"quantization": {
|
7 |
"activations_dtype": "QUInt8",
|
8 |
"activations_symmetric": false,
|
|
|
20 |
"Transpose",
|
21 |
"EmbedLayerNormalization"
|
22 |
],
|
23 |
+
"per_channel": true,
|
24 |
"qdq_add_pair_to_weight": false,
|
25 |
"qdq_dedicated_pair": false,
|
26 |
"qdq_op_type_per_channel_support_to_axis": {
|
|
|
30 |
"weights_dtype": "QUInt8",
|
31 |
"weights_symmetric": true
|
32 |
},
|
33 |
+
"transformers_version": "4.38.1",
|
34 |
"use_external_data_format": false
|
35 |
}
|