wellness10
/

QUANTIZED_OCTOPUSV2_ONNX

Text Generation

Inference Endpoints

Model card Files Files and versions Community

QUANTIZED_OCTOPUSV2_ONNX / ort_config.json

RadAlienware's picture

Upload folder using huggingface_hub

d3497d5 verified 8 months ago

history blame contribute delete

834 Bytes

	{
	"one_external_file": true,
	"opset": null,
	"optimization": {},
	"optimum_version": "1.20.0.dev0",
	"quantization": {
	"activations_dtype": "QUInt8",
	"activations_symmetric": false,
	"format": "QOperator",
	"is_static": false,
	"mode": "IntegerOps",
	"nodes_to_exclude": [],
	"nodes_to_quantize": [],
	"operators_to_quantize": [
	"Conv",
	"MatMul",
	"Attention",
	"LSTM",
	"Gather",
	"Transpose",
	"EmbedLayerNormalization"
	],
	"per_channel": false,
	"qdq_add_pair_to_weight": false,
	"qdq_dedicated_pair": false,
	"qdq_op_type_per_channel_support_to_axis": {
	"MatMul": 1
	},
	"reduce_range": false,
	"weights_dtype": "QInt8",
	"weights_symmetric": true
	},
	"transformers_version": "4.40.1",
	"use_external_data_format": true
	}