OSUM / wenet /bin /export_ipex.py
tomxxie
适配zeroGPU
568e264
raw
history blame
3.38 kB
# Copyright (C) 2021-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from __future__ import print_function
import argparse
import logging
import os
import torch
import yaml
from wenet.utils.init_model import init_model
import intel_extension_for_pytorch as ipex
from intel_extension_for_pytorch.quantization import prepare, convert
def get_args():
parser = argparse.ArgumentParser(description='export your script model')
parser.add_argument('--config', required=True, help='config file')
parser.add_argument('--checkpoint', required=True, help='checkpoint model')
parser.add_argument('--output_file', default=None, help='output file')
parser.add_argument('--dtype',
default="fp32",
help='choose the dtype to run:[fp32,bf16]')
parser.add_argument('--output_quant_file',
default=None,
help='output quantized model file')
args = parser.parse_args()
return args
def scripting(model):
with torch.inference_mode():
script_model = torch.jit.script(model)
script_model = torch.jit.freeze(
script_model,
preserved_attrs=[
"forward_encoder_chunk", "ctc_activation",
"forward_attention_decoder", "subsampling_rate",
"right_context", "sos_symbol", "eos_symbol",
"is_bidirectional_decoder"
])
return script_model
def main():
args = get_args()
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(message)s')
# No need gpu for model export
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
with open(args.config, 'r') as fin:
configs = yaml.load(fin, Loader=yaml.FullLoader)
model, configs = init_model(args, configs)
print(model)
# Apply IPEX optimization
model.eval()
torch._C._jit_set_texpr_fuser_enabled(False)
model.to(memory_format=torch.channels_last)
if args.dtype == "fp32":
ipex_model = ipex.optimize(model)
elif args.dtype == "bf16": # For Intel 4th generation Xeon (SPR)
ipex_model = ipex.optimize(model,
dtype=torch.bfloat16,
weights_prepack=False)
# Export jit torch script model
if args.output_file:
if args.dtype == "fp32":
script_model = scripting(ipex_model)
elif args.dtype == "bf16":
torch._C._jit_set_autocast_mode(True)
with torch.cpu.amp.autocast():
script_model = scripting(ipex_model)
script_model.save(args.output_file)
print('Export model successfully, see {}'.format(args.output_file))
# Export quantized jit torch script model
if args.output_quant_file:
dynamic_qconfig = ipex.quantization.default_dynamic_qconfig
dummy_data = (torch.zeros(1, 67, 80), 16, -16,
torch.zeros(12, 4, 32, 128), torch.zeros(12, 1, 256, 7))
model = prepare(model, dynamic_qconfig, dummy_data)
model = convert(model)
script_quant_model = scripting(model)
script_quant_model.save(args.output_quant_file)
print('Export quantized model successfully, '
'see {}'.format(args.output_quant_file))
if __name__ == '__main__':
main()