rokeya71
/

granite-embedding-30m-english-onnx

Upload 10 files

4e8c1b1 verified 6 months ago

1.28 kB

	# import onnx

	# # Load the ONNX model
	# model_path = "model_uint8.onnx" # Replace with the path to your ONNX model
	# onnx_model = onnx.load(model_path)

	# # Print model's input and output shapes
	# for input_tensor in onnx_model.graph.input:
	# print(f"Input Name: {input_tensor.name}")
	# print(
	# f"Input Shape: {[dim.dim_value for dim in input_tensor.type.tensor_type.shape.dim]}"
	# )

	# for output_tensor in onnx_model.graph.output:
	# print(f"Output Name: {output_tensor.name}")
	# print(
	# f"Output Shape: {[dim.dim_value for dim in output_tensor.type.tensor_type.shape.dim]}"
	# )


	from onnxruntime.quantization import quantize_dynamic, QuantType

	# Define the path to the original ONNX model and the quantized output model
	onnx_model_path = "./granite_embedding_model.onnx" # Path to the original ONNX model
	quantized_model_path = "./model_uint8.onnx" # Path to save the quantized ONNX model

	# Perform dynamic quantization to UInt8
	quantize_dynamic(
	model_input=onnx_model_path, # Input ONNX model path
	model_output=quantized_model_path, # Output quantized model path
	weight_type=QuantType.QUInt8, # Use UInt8 for weights
	)

	# Print confirmation of quantization
	print(f"Quantized model saved to {quantized_model_path}")