darshanmakwana's picture
Upload folder using huggingface_hub
e0c2d04 verified
# Copyright 2022, Lefebvre Dalloz Services
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import inspect
import tempfile
from pathlib import Path
import pytest
from transformers import AutoConfig, AutoTokenizer, PretrainedConfig, PreTrainedTokenizer
from transformer_deploy.t5_utils import t5_model
from transformer_deploy.triton.configuration import EngineType
from transformer_deploy.triton.configuration_decoder import ConfigurationDec
from transformer_deploy.triton.configuration_encoder import ConfigurationEnc
from transformer_deploy.triton.configuration_question_answering import ConfigurationQuestionAnswering
from transformer_deploy.triton.configuration_t5 import ConfigurationT5Decoder, ConfigurationT5Encoder
from transformer_deploy.triton.configuration_token_classifier import ConfigurationTokenClassifier
from transformer_deploy.utils import generative_model, python_tokenizer, question_answering, token_classifier
@pytest.fixture
def working_directory() -> tempfile.TemporaryDirectory:
return tempfile.TemporaryDirectory()
@pytest.fixture
def conf_encoder(working_directory: tempfile.TemporaryDirectory):
conf = ConfigurationEnc(
model_name_base="test",
dim_output=[-1, 2],
nb_instance=1,
tensor_input_names=["input_ids", "attention_mask"],
working_directory=working_directory.name,
device="cuda",
)
conf.engine_type = EngineType.ONNX # should be provided later...
return conf
@pytest.fixture
def conf_decoder(working_directory: tempfile.TemporaryDirectory):
conf = ConfigurationDec(
model_name_base="test",
dim_output=[-1, 2],
nb_instance=1,
tensor_input_names=["input_ids", "attention_mask"],
working_directory=working_directory.name,
device="cuda",
)
conf.engine_type = EngineType.ONNX # should be provided later...
return conf
@pytest.fixture
def conf_token_classifier(working_directory: tempfile.TemporaryDirectory):
conf = ConfigurationTokenClassifier(
model_name_base="test",
dim_output=[-1, 2],
nb_instance=1,
tensor_input_names=["input_ids", "attention_mask"],
working_directory=working_directory.name,
device="cuda",
)
conf.engine_type = EngineType.ONNX
return conf
@pytest.fixture
def conf_question_answering(working_directory: tempfile.TemporaryDirectory):
conf = ConfigurationQuestionAnswering(
model_name_base="test",
dim_output=[-1, 2],
nb_instance=1,
tensor_input_names=["input_ids", "attention_mask"],
working_directory=working_directory.name,
device="cuda",
)
conf.engine_type = EngineType.ONNX
return conf
@pytest.fixture
def conf_encoder_t5(working_directory: tempfile.TemporaryDirectory):
conf = ConfigurationT5Encoder(
model_name_base="test",
dim_output=[-1, 2],
nb_instance=1,
tensor_input_names=["input_ids", "attention_mask"],
working_directory=working_directory.name,
device="cuda",
)
conf.engine_type = EngineType.ONNX # should be provided later...
return conf
@pytest.fixture
def conf_decoder_t5(working_directory: tempfile.TemporaryDirectory):
conf = ConfigurationT5Decoder(
model_name_base="test",
dim_output=[-1, 2],
nb_instance=1,
tensor_input_names=["input_ids", "attention_mask"],
working_directory=working_directory.name,
device="cuda",
)
conf.engine_type = EngineType.ONNX # should be provided later...
return conf
def test_model_conf(conf_encoder, conf_decoder, conf_token_classifier):
expected = """
name: "test_onnx_model"
max_batch_size: 0
platform: "onnxruntime_onnx"
default_model_filename: "model.bin"
input [
{
name: "input_ids"
data_type: TYPE_INT32
dims: [-1, -1]
},
{
name: "attention_mask"
data_type: TYPE_INT32
dims: [-1, -1]
}
]
output {
name: "output"
data_type: TYPE_FP32
dims: [-1, 2]
}
instance_group [
{
count: 1
kind: KIND_GPU
}
]
""" # noqa: W293
assert expected.strip() == conf_encoder.get_model_conf()
assert expected.strip() == conf_decoder.get_model_conf()
assert expected.strip() == conf_token_classifier.get_model_conf()
def test_tokenizer_conf(conf_encoder):
expected = """
name: "test_onnx_tokenize"
max_batch_size: 0
backend: "python"
input [
{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
output [
{
name: "input_ids"
data_type: TYPE_INT32
dims: [-1, -1]
},
{
name: "attention_mask"
data_type: TYPE_INT32
dims: [-1, -1]
}
]
instance_group [
{
count: 1
kind: KIND_GPU
}
]
""" # noqa: W293
assert expected.strip() == conf_encoder.get_tokenize_conf()
def test_inference_conf(conf_encoder):
expected = """
name: "test_onnx_inference"
max_batch_size: 0
platform: "ensemble"
input [
{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
output {
name: "output"
data_type: TYPE_FP32
dims: [-1, 2]
}
ensemble_scheduling {
step [
{
model_name: "test_onnx_tokenize"
model_version: -1
input_map {
key: "TEXT"
value: "TEXT"
}
output_map [
{
key: "input_ids"
value: "input_ids"
},
{
key: "attention_mask"
value: "attention_mask"
}
]
},
{
model_name: "test_onnx_model"
model_version: -1
input_map [
{
key: "input_ids"
value: "input_ids"
},
{
key: "attention_mask"
value: "attention_mask"
}
]
output_map {
key: "output"
value: "output"
}
}
]
}
""" # noqa: W293
assert expected.strip() == conf_encoder.get_inference_conf()
def test_generate_conf(conf_decoder):
expected = """
name: "test_onnx_generate"
max_batch_size: 0
backend: "python"
input [
{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
output [
{
name: "output"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
instance_group [
{
count: 1
kind: KIND_GPU
}
]
parameters: {
key: "FORCE_CPU_ONLY_INPUT_TENSORS"
value: {
string_value:"no"
}
}
""" # noqa: W293
print(conf_decoder.get_generation_conf())
assert expected.strip() == conf_decoder.get_generation_conf()
def test_token_classifier_inference_conf(conf_token_classifier):
expected = """
name: "test_onnx_inference"
max_batch_size: 0
backend: "python"
input [
{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
output [
{
name: "output"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
instance_group [
{
count: 1
kind: KIND_GPU
}
]
parameters: {
key: "FORCE_CPU_ONLY_INPUT_TENSORS"
value: {
string_value:"no"
}
}
"""
assert expected.strip() == conf_token_classifier.get_inference_conf()
def test_question_answering_inference_conf(conf_question_answering):
expected = """
name: "test_onnx_inference"
max_batch_size: 0
backend: "python"
input [
{
name: "QUESTION"
data_type: TYPE_STRING
dims: [ -1 ]
},
{
name: "CONTEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
output [
{
name: "output"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
instance_group [
{
count: 1
kind: KIND_GPU
}
]
parameters: {
key: "FORCE_CPU_ONLY_INPUT_TENSORS"
value: {
string_value:"no"
}
}
"""
assert expected.strip() == conf_question_answering.get_inference_conf()
def test_t5_encoder_inference_conf(conf_encoder_t5):
expected = """
name: "test_onnx_inference"
max_batch_size: 0
platform: "ensemble"
input [
{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
output {
name: "output"
data_type: TYPE_FP32
dims: [-1, 2]
}
ensemble_scheduling {
step [
{
model_name: "test_onnx_tokenize"
model_version: -1
input_map {
key: "TEXT"
value: "TEXT"
}
output_map [
{
key: "input_ids"
value: "input_ids"
},
{
key: "attention_mask"
value: "attention_mask"
}
]
},
{
model_name: "test_onnx_model"
model_version: -1
input_map [
{
key: "input_ids"
value: "input_ids"
},
{
key: "attention_mask"
value: "attention_mask"
}
]
output_map {
key: "output"
value: "output"
}
}
]
}
"""
assert expected.strip() == conf_encoder_t5.get_inference_conf()
def test_t5_decoder_generate_conf(conf_decoder_t5):
expected = """
name: "t5_model_generate"
max_batch_size: 0
backend: "python"
input [
{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
output {
name: "OUTPUT_TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}
instance_group [
{
count: 1
kind: KIND_GPU
}
]
parameters: {
key: "FORCE_CPU_ONLY_INPUT_TENSORS"
value: {
string_value:"no"
}
}
"""
assert expected.strip() == conf_decoder_t5.get_generation_conf()
def test_create_folders(
conf_encoder,
conf_decoder,
conf_token_classifier,
conf_question_answering,
conf_encoder_t5,
conf_decoder_t5,
working_directory: tempfile.TemporaryDirectory,
):
fake_model_path = Path(working_directory.name).joinpath("fake_model.bin")
fake_model_path.write_bytes(b"abc")
for conf, paths, python_code in [
(
conf_encoder,
[
conf_encoder.model_folder_name,
conf_encoder.python_folder_name,
conf_encoder.inference_folder_name,
],
python_tokenizer,
),
(
conf_decoder,
[
conf_decoder.model_folder_name,
conf_decoder.python_folder_name,
conf_decoder.inference_folder_name,
],
generative_model,
),
(
conf_token_classifier,
[
conf_token_classifier.model_folder_name,
conf_token_classifier.python_folder_name,
conf_token_classifier.inference_folder_name,
],
token_classifier,
),
(
conf_question_answering,
[
conf_question_answering.model_folder_name,
conf_question_answering.python_folder_name,
conf_question_answering.inference_folder_name,
],
question_answering,
),
(
conf_encoder_t5,
[
conf_encoder_t5.model_folder_name,
conf_encoder_t5.python_folder_name,
conf_encoder_t5.inference_folder_name,
],
t5_model,
),
(
conf_decoder_t5,
[
conf_decoder_t5.model_folder_name,
conf_decoder_t5.python_folder_name,
],
t5_model,
),
]:
model_name = (
"t5-small"
if type(conf) in [ConfigurationT5Decoder, ConfigurationT5Encoder]
else "philschmid/MiniLM-L6-H384-uncased-sst2"
)
tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(model_name)
config: PretrainedConfig = AutoConfig.from_pretrained(model_name)
conf.create_configs(tokenizer=tokenizer, config=config, model_path=fake_model_path, engine_type=EngineType.ONNX)
for folder_name in paths:
path = Path(conf.working_dir).joinpath(folder_name)
assert path.joinpath("config.pbtxt").exists()
assert path.joinpath("config.pbtxt").read_text() != ""
assert path.joinpath("1").exists()
model_path = Path(conf.working_dir).joinpath(conf.python_folder_name).joinpath("1").joinpath("model.py")
assert model_path.exists()
if type(conf) not in [ConfigurationT5Decoder, ConfigurationT5Encoder]:
assert model_path.read_text() == inspect.getsource(python_code)