|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
|
|
import torch
|
|
|
|
from models import SynthesizerTrn
|
|
import utils
|
|
|
|
try:
|
|
import onnxruntime as ort
|
|
except ImportError:
|
|
print('Please install onnxruntime!')
|
|
sys.exit(1)
|
|
|
|
|
|
def to_numpy(tensor):
|
|
return tensor.detach().cpu().numpy() if tensor.requires_grad \
|
|
else tensor.detach().numpy()
|
|
|
|
|
|
def get_args():
|
|
parser = argparse.ArgumentParser(description='export onnx model')
|
|
parser.add_argument('--checkpoint', required=True, help='checkpoint')
|
|
parser.add_argument('--cfg', required=True, help='config file')
|
|
parser.add_argument('--onnx_model', required=True, help='onnx model name')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
parser.add_argument(
|
|
'--providers',
|
|
required=False,
|
|
default='CPUExecutionProvider',
|
|
choices=['CUDAExecutionProvider', 'CPUExecutionProvider'],
|
|
help='the model to send request to')
|
|
args = parser.parse_args()
|
|
return args
|
|
|
|
|
|
def get_data_from_cfg(cfg_path: str):
|
|
assert os.path.isfile(cfg_path)
|
|
with open(cfg_path, 'r') as f:
|
|
data = json.load(f)
|
|
symbols = data["symbols"]
|
|
speaker_num = data["data"]["n_speakers"]
|
|
return len(symbols), speaker_num
|
|
|
|
|
|
def main():
|
|
args = get_args()
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
|
|
|
hps = utils.get_hparams_from_file(args.cfg)
|
|
|
|
|
|
|
|
|
|
|
|
phone_num, num_speakers = get_data_from_cfg(args.cfg)
|
|
net_g = SynthesizerTrn(phone_num,
|
|
hps.data.filter_length // 2 + 1,
|
|
hps.train.segment_size // hps.data.hop_length,
|
|
n_speakers=num_speakers,
|
|
**hps.model)
|
|
utils.load_checkpoint(args.checkpoint, net_g, None)
|
|
net_g.forward = net_g.export_forward
|
|
net_g.eval()
|
|
|
|
seq = torch.randint(low=0, high=phone_num, size=(1, 10), dtype=torch.long)
|
|
seq_len = torch.IntTensor([seq.size(1)]).long()
|
|
|
|
|
|
|
|
scales = torch.FloatTensor([0.667, 1.0, 0.8])
|
|
|
|
scales = scales.unsqueeze(0)
|
|
sid = torch.IntTensor([0]).long()
|
|
|
|
dummy_input = (seq, seq_len, scales, sid)
|
|
torch.onnx.export(model=net_g,
|
|
args=dummy_input,
|
|
f=args.onnx_model,
|
|
input_names=['input', 'input_lengths', 'scales', 'sid'],
|
|
output_names=['output'],
|
|
dynamic_axes={
|
|
'input': {
|
|
0: 'batch',
|
|
1: 'phonemes'
|
|
},
|
|
'input_lengths': {
|
|
0: 'batch'
|
|
},
|
|
'scales': {
|
|
0: 'batch'
|
|
},
|
|
'sid': {
|
|
0: 'batch'
|
|
},
|
|
'output': {
|
|
0: 'batch',
|
|
1: 'audio',
|
|
2: 'audio_length'
|
|
}
|
|
},
|
|
opset_version=13,
|
|
verbose=False)
|
|
|
|
|
|
torch_output = net_g(seq, seq_len, scales, sid)
|
|
providers = [args.providers]
|
|
ort_sess = ort.InferenceSession(args.onnx_model, providers=providers)
|
|
ort_inputs = {
|
|
'input': to_numpy(seq),
|
|
'input_lengths': to_numpy(seq_len),
|
|
'scales': to_numpy(scales),
|
|
'sid': to_numpy(sid),
|
|
}
|
|
onnx_output = ort_sess.run(None, ort_inputs)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
|