File size: 3,994 Bytes
a5e4e8f
5d4550d
a5e4e8f
5d4550d
a5e4e8f
 
 
 
5d4550d
a5e4e8f
 
 
5d4550d
a5e4e8f
5d4550d
a5e4e8f
5d4550d
 
a5e4e8f
5d4550d
a5e4e8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d4550d
 
 
a5e4e8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d4550d
 
 
 
a5e4e8f
5d4550d
 
 
 
 
a5e4e8f
 
5d4550d
a5e4e8f
 
 
 
5d4550d
 
 
 
 
a5e4e8f
 
 
5d4550d
 
a5e4e8f
5d4550d
 
a5e4e8f
 
 
 
 
 
 
 
 
 
 
 
 
5d4550d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from transformers import PretrainedConfig, PreTrainedModel, AutoConfig, AutoModel, modeling_utils
from transformers.pipelines import PIPELINE_REGISTRY
from huggingface_hub import hf_hub_download

import onnxruntime as ort
import torch
import os
import torch.nn as nn

# 1. register AutoConfig
class ONNXBaseConfig(PretrainedConfig):
    model_type = 'onnx-base'

AutoConfig.register('onnx-base', ONNXBaseConfig)

# 2. register AutoModel
class ONNXBaseModel(PreTrainedModel):
    config_class = ONNXBaseConfig
    def __init__(self, config, base_path=None):
        super().__init__(config)
        if base_path:
            model_path = base_path + '/' + config.model_path
            if os.path.exists(model_path):
                self.session = ort.InferenceSession(model_path)

    def forward(self, input=None, **kwargs):
        outs = self.session.run(None, {'input': input})
        return outs

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
        config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
        is_local = os.path.isdir(pretrained_model_name_or_path)
        if is_local:
            base_path = pretrained_model_name_or_path
        else:
            config_path = hf_hub_download(repo_id=pretrained_model_name_or_path, filename='config.json')
            base_path = os.path.dirname(config_path)
            hf_hub_download(repo_id=pretrained_model_name_or_path, filename=config.model_path)
        return cls(config, base_path=base_path)

    @property
    def device(self):
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        return torch.device(device)

AutoModel.register(ONNXBaseConfig, ONNXBaseModel)

# option: save config to path
local_model_path = './custom_model'
config = ONNXBaseConfig(model_path='model.onnx',
                        id2label={0: 'label_0', 1: 'label_1'},
                        label2id={0: 'label_1', 1: 'label_0'})
model = ONNXBaseModel(config, base_path='./custom_mode')
config.save_pretrained(local_model_path)
# make sure have model_type
import json
config_path = local_model_path +  '/config.json'
with open(config_path, 'r') as f:
    config_data = json.load(f)
config_data['model_type'] = 'onnx-base'
del config_data['transformers_version']
with open(config_path, 'w') as f:
    json.dump(config_data, f, indent=2)

# save onnx
dummy_input = torch.tensor([[[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]], dtype=torch.float32)
onnx_file_path = './custom_model' + '/' +  'model.onnx'
class ZeroModel(nn.Module):
    def __init__(self):
        super(ZeroModel, self).__init__()
    def forward(self, x):
        return torch.zeros_like(x)
zero_model = ZeroModel()
torch.onnx.export(zero_model, dummy_input, onnx_file_path,
                  input_names=['input'], output_names=['output'],
                  dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}})


# 2. register Pipeline
from transformers.pipelines import Pipeline

class ONNXBasePipeline(Pipeline):
    def __init__(self, model, **kwargs):
        self.device_id = kwargs['device']
        super().__init__(model=model, **kwargs)

    def _sanitize_parameters(self, **kwargs):
        return {}, {}, {}

    def preprocess(self, input):
        return {'input': input}

    def _forward(self, model_input):
        with torch.no_grad():
            outputs = self.model(**model_input)
        return outputs

    def postprocess(self, model_outputs):
        return model_outputs

PIPELINE_REGISTRY.register_pipeline(
    task='onnx-base',
    pipeline_class=ONNXBasePipeline,
    pt_model=ONNXBaseModel
)

# 4. show how to use
from transformers import pipeline

pipe = pipeline(
    task='onnx-base',
    model='m3/onnx-base',
    batch_size=10,
    device='cuda',
)

dummy_input = torch.tensor([[[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]], dtype=torch.float32)
input_data = dummy_input.numpy()
result = pipe(
    inputs=input_data, device='cuda',
)
print(result)