|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import ast |
|
import importlib |
|
import logging |
|
|
|
import torch |
|
from torch import nn |
|
|
|
from transformer_deploy.QDQModels.ast_operator_patch import Patch2ArgsNode, PatchAdd2ArgsNode, PatchLayer |
|
from transformer_deploy.QDQModels.ast_utils import add_quant_to_module, list_class_to_patch |
|
|
|
|
|
class FakeModel(nn.Module): |
|
def __init__(self): |
|
super().__init__() |
|
self.linear = nn.Linear(in_features=5, out_features=5, bias=True) |
|
|
|
def forward(self, inputs: torch.Tensor): |
|
a: torch.Tensor = self.linear(inputs) |
|
b = torch.ones(a.shape) |
|
c = torch.matmul(a, b) |
|
d = nn.LayerNorm(a + c) |
|
return d |
|
|
|
def to_skip(self): |
|
return self.linear |
|
|
|
|
|
expected_class = """ |
|
class QDQFakeModel(nn.Module): |
|
|
|
def __init__(self): |
|
super().__init__() |
|
self.linear = quant_nn.QuantLinear(in_features=5, out_features=5, bias=True) |
|
self.matmul_quantizer_0 = TensorQuantizer(quant_nn.QuantLinear.default_quant_desc_input) |
|
self.matmul_quantizer_1 = TensorQuantizer(quant_nn.QuantLinear.default_quant_desc_input) |
|
self.layernorm_quantizer_2 = TensorQuantizer(quant_nn.QuantLinear.default_quant_desc_input) |
|
self.layernorm_quantizer_3 = TensorQuantizer(quant_nn.QuantLinear.default_quant_desc_input) |
|
|
|
def forward(self, inputs: torch.Tensor): |
|
a: torch.Tensor = self.linear(inputs) |
|
b = torch.ones(a.shape) |
|
c = torch.matmul(self.matmul_quantizer_0(a), self.matmul_quantizer_1(b)) |
|
d = nn.LayerNorm(self.layernorm_quantizer_2(a) + self.layernorm_quantizer_3(c)) |
|
return d |
|
|
|
def to_skip(self): |
|
return self.linear |
|
""".strip() |
|
|
|
|
|
def test_list_class(): |
|
model_module = importlib.import_module(name=__name__) |
|
class_to_patch = list_class_to_patch(model_module=model_module) |
|
assert class_to_patch == ["FakeModel"] |
|
|
|
|
|
def test_add_quant(): |
|
head = add_quant_to_module(module_to_patch=FakeModel, new_module_name="QDQFakeModel") |
|
head = ast.fix_missing_locations(head) |
|
logging.error(ast.unparse(head)) |
|
assert ast.unparse(head) == expected_class |
|
|
|
|
|
def test_patch_2_args_node(): |
|
source_code = "torch.matmul(a, b)" |
|
patch = Patch2ArgsNode(op="matmul") |
|
head: ast.AST = ast.parse(source_code).body[0].value |
|
assert patch.should_patch(head) |
|
head_patched = patch.patch(node=head, nb_quant_node=0) |
|
assert ast.unparse(head) == "torch.matmul(self.matmul_quantizer_0(a), self.matmul_quantizer_1(b))" |
|
assert head_patched == ["matmul_quantizer_0", "matmul_quantizer_1"] |
|
|
|
|
|
def test_add_2_args_node(): |
|
source_code = "nn.LayerNorm(hidden_states + input_tensor)" |
|
patch = PatchAdd2ArgsNode(op="LayerNorm") |
|
head: ast.AST = ast.parse(source_code).body[0].value |
|
assert patch.should_patch(head) |
|
head_patched = patch.patch(node=head, nb_quant_node=0) |
|
assert ( |
|
ast.unparse(head) |
|
== "nn.LayerNorm(self.layernorm_quantizer_0(hidden_states) + self.layernorm_quantizer_1(input_tensor))" |
|
) |
|
assert head_patched == ["layernorm_quantizer_0", "layernorm_quantizer_1"] |
|
|
|
|
|
def test_replace_layer(): |
|
source_code = "nn.Linear(config.hidden_size, self.all_head_size)" |
|
patch = PatchLayer(origin_module="nn", origin_layer="Linear", target_module="quant_nn", target_layer="QuantLinear") |
|
head: ast.AST = ast.parse(source_code).body[0].value |
|
assert patch.should_patch(head) |
|
head_patched = patch.patch(node=head, nb_quant_node=0) |
|
assert ast.unparse(head) == "quant_nn.QuantLinear(config.hidden_size, self.all_head_size)" |
|
assert head_patched == [] |
|
|