Spaces:
Runtime error
Runtime error
File size: 1,776 Bytes
be49b0b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
#
# QAT_quantizer.py
# YOLOv6
#
# Created by Meituan on 2022/06/24.
# Copyright © 2022
#
from absl import logging
from pytorch_quantization import nn as quant_nn
from pytorch_quantization import quant_modules
# Call this function before defining the model
def tensorrt_official_qat():
# Quantization Aware Training is based on Straight Through Estimator (STE) derivative approximation.
# It is some time known as “quantization aware training”.
# PyTorch-Quantization is a toolkit for training and evaluating PyTorch models with simulated quantization.
# Quantization can be added to the model automatically, or manually, allowing the model to be tuned for accuracy and performance.
# Quantization is compatible with NVIDIAs high performance integer kernels which leverage integer Tensor Cores.
# The quantized model can be exported to ONNX and imported by TensorRT 8.0 and later.
# https://github.com/NVIDIA/TensorRT/blob/main/tools/pytorch-quantization/examples/finetune_quant_resnet50.ipynb
# The example to export the
# model.eval()
# quant_nn.TensorQuantizer.use_fb_fake_quant = True # We have to shift to pytorch's fake quant ops before exporting the model to ONNX
# opset_version = 13
# Export ONNX for multiple batch sizes
# print("Creating ONNX file: " + onnx_filename)
# dummy_input = torch.randn(batch_onnx, 3, 224, 224, device='cuda') #TODO: switch input dims by model
# torch.onnx.export(model, dummy_input, onnx_filename, verbose=False, opset_version=opset_version, enable_onnx_checker=False, do_constant_folding=True)
try:
quant_modules.initialize()
except NameError:
logging.info("initialzation error for quant_modules")
# def QAT_quantizer():
# coming soon |