File size: 12,809 Bytes

e0c2d04

#  Copyright 2022, Lefebvre Dalloz Services
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
import inspect
import tempfile
from pathlib import Path

import pytest
from transformers import AutoConfig, AutoTokenizer, PretrainedConfig, PreTrainedTokenizer

from transformer_deploy.t5_utils import t5_model
from transformer_deploy.triton.configuration import EngineType
from transformer_deploy.triton.configuration_decoder import ConfigurationDec
from transformer_deploy.triton.configuration_encoder import ConfigurationEnc
from transformer_deploy.triton.configuration_question_answering import ConfigurationQuestionAnswering
from transformer_deploy.triton.configuration_t5 import ConfigurationT5Decoder, ConfigurationT5Encoder
from transformer_deploy.triton.configuration_token_classifier import ConfigurationTokenClassifier
from transformer_deploy.utils import generative_model, python_tokenizer, question_answering, token_classifier


@pytest.fixture
def working_directory() -> tempfile.TemporaryDirectory:
    return tempfile.TemporaryDirectory()


@pytest.fixture
def conf_encoder(working_directory: tempfile.TemporaryDirectory):
    conf = ConfigurationEnc(
        model_name_base="test",
        dim_output=[-1, 2],
        nb_instance=1,
        tensor_input_names=["input_ids", "attention_mask"],
        working_directory=working_directory.name,
        device="cuda",
    )
    conf.engine_type = EngineType.ONNX  # should be provided later...
    return conf


@pytest.fixture
def conf_decoder(working_directory: tempfile.TemporaryDirectory):
    conf = ConfigurationDec(
        model_name_base="test",
        dim_output=[-1, 2],
        nb_instance=1,
        tensor_input_names=["input_ids", "attention_mask"],
        working_directory=working_directory.name,
        device="cuda",
    )
    conf.engine_type = EngineType.ONNX  # should be provided later...
    return conf


@pytest.fixture
def conf_token_classifier(working_directory: tempfile.TemporaryDirectory):
    conf = ConfigurationTokenClassifier(
        model_name_base="test",
        dim_output=[-1, 2],
        nb_instance=1,
        tensor_input_names=["input_ids", "attention_mask"],
        working_directory=working_directory.name,
        device="cuda",
    )
    conf.engine_type = EngineType.ONNX
    return conf


@pytest.fixture
def conf_question_answering(working_directory: tempfile.TemporaryDirectory):
    conf = ConfigurationQuestionAnswering(
        model_name_base="test",
        dim_output=[-1, 2],
        nb_instance=1,
        tensor_input_names=["input_ids", "attention_mask"],
        working_directory=working_directory.name,
        device="cuda",
    )
    conf.engine_type = EngineType.ONNX
    return conf


@pytest.fixture
def conf_encoder_t5(working_directory: tempfile.TemporaryDirectory):
    conf = ConfigurationT5Encoder(
        model_name_base="test",
        dim_output=[-1, 2],
        nb_instance=1,
        tensor_input_names=["input_ids", "attention_mask"],
        working_directory=working_directory.name,
        device="cuda",
    )
    conf.engine_type = EngineType.ONNX  # should be provided later...
    return conf


@pytest.fixture
def conf_decoder_t5(working_directory: tempfile.TemporaryDirectory):
    conf = ConfigurationT5Decoder(
        model_name_base="test",
        dim_output=[-1, 2],
        nb_instance=1,
        tensor_input_names=["input_ids", "attention_mask"],
        working_directory=working_directory.name,
        device="cuda",
    )
    conf.engine_type = EngineType.ONNX  # should be provided later...
    return conf


def test_model_conf(conf_encoder, conf_decoder, conf_token_classifier):
    expected = """
name: "test_onnx_model"
max_batch_size: 0
platform: "onnxruntime_onnx"
default_model_filename: "model.bin"

input [
{
    name: "input_ids"
    data_type: TYPE_INT32
    dims: [-1, -1]
},
{
    name: "attention_mask"
    data_type: TYPE_INT32
    dims: [-1, -1]
}
]

output {
    name: "output"
    data_type: TYPE_FP32
    dims: [-1, 2]
}

instance_group [
    {
      count: 1
      kind: KIND_GPU
    }
]
"""  # noqa: W293
    assert expected.strip() == conf_encoder.get_model_conf()
    assert expected.strip() == conf_decoder.get_model_conf()
    assert expected.strip() == conf_token_classifier.get_model_conf()


def test_tokenizer_conf(conf_encoder):
    expected = """
name: "test_onnx_tokenize"
max_batch_size: 0
backend: "python"

input [
{
    name: "TEXT"
    data_type: TYPE_STRING
    dims: [ -1 ]
}
]

output [
{
    name: "input_ids"
    data_type: TYPE_INT32
    dims: [-1, -1]
},
{
    name: "attention_mask"
    data_type: TYPE_INT32
    dims: [-1, -1]
}
]

instance_group [
    {
      count: 1
      kind: KIND_GPU
    }
]
"""  # noqa: W293
    assert expected.strip() == conf_encoder.get_tokenize_conf()


def test_inference_conf(conf_encoder):
    expected = """
name: "test_onnx_inference"
max_batch_size: 0
platform: "ensemble"

input [
{
    name: "TEXT"
    data_type: TYPE_STRING
    dims: [ -1 ]
}
]

output {
    name: "output"
    data_type: TYPE_FP32
    dims: [-1, 2]
}

ensemble_scheduling {
    step [
        {
            model_name: "test_onnx_tokenize"
            model_version: -1
            input_map {
            key: "TEXT"
            value: "TEXT"
        }
        output_map [
{
    key: "input_ids"
    value: "input_ids"
},
{
    key: "attention_mask"
    value: "attention_mask"
}
        ]
        },
        {
            model_name: "test_onnx_model"
            model_version: -1
            input_map [
{
    key: "input_ids"
    value: "input_ids"
},
{
    key: "attention_mask"
    value: "attention_mask"
}
            ]
        output_map {
                key: "output"
                value: "output"
            }
        }
    ]
}
"""  # noqa: W293
    assert expected.strip() == conf_encoder.get_inference_conf()


def test_generate_conf(conf_decoder):
    expected = """
name: "test_onnx_generate"
max_batch_size: 0
backend: "python"

input [
    {
        name: "TEXT"
        data_type: TYPE_STRING
        dims: [ -1 ]
    }
]

output [
    {
        name: "output"
        data_type: TYPE_STRING
        dims: [ -1 ]
    }
]

instance_group [
    {
      count: 1
      kind: KIND_GPU
    }
]

parameters: {
  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
  value: {
    string_value:"no"
  }
}
"""  # noqa: W293
    print(conf_decoder.get_generation_conf())
    assert expected.strip() == conf_decoder.get_generation_conf()


def test_token_classifier_inference_conf(conf_token_classifier):
    expected = """
name: "test_onnx_inference"
max_batch_size: 0
backend: "python"

input [
    {
        name: "TEXT"
        data_type: TYPE_STRING
        dims: [ -1 ]
    }
]

output [
    {
        name: "output"
        data_type: TYPE_STRING
        dims: [ -1 ]
    }
]

instance_group [
    {
      count: 1
      kind: KIND_GPU
    }
]


parameters: {
  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
  value: {
    string_value:"no"
  }
}
"""
    assert expected.strip() == conf_token_classifier.get_inference_conf()


def test_question_answering_inference_conf(conf_question_answering):
    expected = """
name: "test_onnx_inference"
max_batch_size: 0
backend: "python"

input [
    {
        name: "QUESTION"
        data_type: TYPE_STRING
        dims: [ -1 ]
    },
    {
        name: "CONTEXT"
        data_type: TYPE_STRING
        dims: [ -1 ]
    }
]

output [
    {
        name: "output"
        data_type: TYPE_STRING
        dims: [ -1 ]
    }
]

instance_group [
    {
      count: 1
      kind: KIND_GPU
    }
]


parameters: {
  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
  value: {
    string_value:"no"
  }
}
"""
    assert expected.strip() == conf_question_answering.get_inference_conf()


def test_t5_encoder_inference_conf(conf_encoder_t5):
    expected = """
name: "test_onnx_inference"
max_batch_size: 0
platform: "ensemble"

input [
{
    name: "TEXT"
    data_type: TYPE_STRING
    dims: [ -1 ]
}
]

output {
    name: "output"
    data_type: TYPE_FP32
    dims: [-1, 2]
}

ensemble_scheduling {
    step [
        {
            model_name: "test_onnx_tokenize"
            model_version: -1
            input_map {
            key: "TEXT"
            value: "TEXT"
        }
        output_map [
{
    key: "input_ids"
    value: "input_ids"
},
{
    key: "attention_mask"
    value: "attention_mask"
}
        ]
        },
        {
            model_name: "test_onnx_model"
            model_version: -1
            input_map [
{
    key: "input_ids"
    value: "input_ids"
},
{
    key: "attention_mask"
    value: "attention_mask"
}
            ]
        output_map {
                key: "output"
                value: "output"
            }
        }
    ]
}
"""
    assert expected.strip() == conf_encoder_t5.get_inference_conf()


def test_t5_decoder_generate_conf(conf_decoder_t5):
    expected = """
name: "t5_model_generate"
max_batch_size: 0
backend: "python"

input [
{
    name: "TEXT"
    data_type: TYPE_STRING
    dims: [ -1 ]
}
]

output {
    name: "OUTPUT_TEXT"
    data_type: TYPE_STRING
    dims: [ -1 ]
}
instance_group [
    {
      count: 1
      kind: KIND_GPU
    }
]

parameters: {
    key: "FORCE_CPU_ONLY_INPUT_TENSORS"
    value: {
        string_value:"no"
    }
}
"""
    assert expected.strip() == conf_decoder_t5.get_generation_conf()


def test_create_folders(
    conf_encoder,
    conf_decoder,
    conf_token_classifier,
    conf_question_answering,
    conf_encoder_t5,
    conf_decoder_t5,
    working_directory: tempfile.TemporaryDirectory,
):
    fake_model_path = Path(working_directory.name).joinpath("fake_model.bin")
    fake_model_path.write_bytes(b"abc")

    for conf, paths, python_code in [
        (
            conf_encoder,
            [
                conf_encoder.model_folder_name,
                conf_encoder.python_folder_name,
                conf_encoder.inference_folder_name,
            ],
            python_tokenizer,
        ),
        (
            conf_decoder,
            [
                conf_decoder.model_folder_name,
                conf_decoder.python_folder_name,
                conf_decoder.inference_folder_name,
            ],
            generative_model,
        ),
        (
            conf_token_classifier,
            [
                conf_token_classifier.model_folder_name,
                conf_token_classifier.python_folder_name,
                conf_token_classifier.inference_folder_name,
            ],
            token_classifier,
        ),
        (
            conf_question_answering,
            [
                conf_question_answering.model_folder_name,
                conf_question_answering.python_folder_name,
                conf_question_answering.inference_folder_name,
            ],
            question_answering,
        ),
        (
            conf_encoder_t5,
            [
                conf_encoder_t5.model_folder_name,
                conf_encoder_t5.python_folder_name,
                conf_encoder_t5.inference_folder_name,
            ],
            t5_model,
        ),
        (
            conf_decoder_t5,
            [
                conf_decoder_t5.model_folder_name,
                conf_decoder_t5.python_folder_name,
            ],
            t5_model,
        ),
    ]:
        model_name = (
            "t5-small"
            if type(conf) in [ConfigurationT5Decoder, ConfigurationT5Encoder]
            else "philschmid/MiniLM-L6-H384-uncased-sst2"
        )
        tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(model_name)
        config: PretrainedConfig = AutoConfig.from_pretrained(model_name)
        conf.create_configs(tokenizer=tokenizer, config=config, model_path=fake_model_path, engine_type=EngineType.ONNX)
        for folder_name in paths:
            path = Path(conf.working_dir).joinpath(folder_name)
            assert path.joinpath("config.pbtxt").exists()
            assert path.joinpath("config.pbtxt").read_text() != ""
            assert path.joinpath("1").exists()

        model_path = Path(conf.working_dir).joinpath(conf.python_folder_name).joinpath("1").joinpath("model.py")
        assert model_path.exists()
        if type(conf) not in [ConfigurationT5Decoder, ConfigurationT5Encoder]:
            assert model_path.read_text() == inspect.getsource(python_code)