# Copyright 2022, Lefebvre Dalloz Services # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import tempfile import pytest import torch from transformer_deploy.convert import main from transformer_deploy.utils.args import parse_args @pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU") def test_albert_gpu(): commands = [ "--model", "nreimers/albert-small-v2", "--backend", "tensorrt", "onnx", "--batch", "1", "16", "16", "--seq-len", "8", "8", "8", "--output", tempfile.mkdtemp(), ] args = parse_args(commands=commands) main(commands=args) def test_minilm_cpu(): commands = [ "--model", "philschmid/MiniLM-L6-H384-uncased-sst2", "--backend", "onnx", "--batch", "1", "1", "1", "--seq-len", "8", "8", "8", "--device", "cpu", "--warmup", "5", "--nb-measures", "10", "--nb-threads", "2", "--output", tempfile.mkdtemp(), ] args = parse_args(commands=commands) main(commands=args) @pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU") def test_minilm_quantization(): commands = [ "--model", "philschmid/MiniLM-L6-H384-uncased-sst2", "--backend", "onnx", "--batch", "1", "1", "1", "--seq-len", "8", "8", "8", "--warmup", "5", "--nb-measures", "10", "--nb-threads", "2", "--quantization", "--output", tempfile.mkdtemp(), ] args = parse_args(commands=commands) main(commands=args) @pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU") def test_camembert_gpu(): commands = [ "--model", "camembert-base", "--backend", "tensorrt", "onnx", "--batch", "1", "16", "16", "--seq-len", "8", "8", "8", "--output", tempfile.mkdtemp(), ] args = parse_args(commands=commands) main(commands=args) @pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU") def test_electra_gpu(): commands = [ "--model", "google/electra-small-discriminator", "--backend", "tensorrt", "onnx", "--batch", "1", "16", "16", "--seq-len", "8", "8", "8", "--output", tempfile.mkdtemp(), ] args = parse_args(commands=commands) main(commands=args) # FAILED tests/test_models.py::test_sentence_transformers_cpu - RuntimeError: Error in execution: Non-zero status # code returned while running EmbedLayerNormalization node. Name:'EmbedLayerNormalization_0' Status # Message: input_ids and position_ids shall have same shape def test_sentence_transformers_cpu(): commands = [ "--model", "sentence-transformers/all-MiniLM-L6-v2", "--backend", "onnx", "--task", "embedding", "--batch", "1", "16", "16", "--seq-len", "8", "8", "8", "--device", "cpu", "--output", tempfile.mkdtemp(), ] args = parse_args(commands=commands) main(commands=args) @pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU") def test_gpt2_gpu(): commands = [ "--model", "sshleifer/tiny-gpt2", "--task", "text-generation", "--backend", "onnx", "tensorrt", "--batch", "1", "16", "16", "--seq-len", "8", "8", "8", "--output", tempfile.mkdtemp(), ] args = parse_args(commands=commands) main(commands=args) @pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU") def test_bert_ner_gpu(): commands = [ "--model", "kamalkraj/bert-base-cased-ner-conll2003", "--task", "token-classification", "--backend", "onnx", "--batch", "1", "16", "16", "--seq-len", "8", "8", "8", "--output", tempfile.mkdtemp(), ] args = parse_args(commands=commands) main(commands=args) @pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU") def test_qa_gpu(): commands = [ "--model", "distilbert-base-cased-distilled-squad", "--task", "question-answering", "--backend", "onnx", "--batch", "1", "16", "16", "--seq-len", "8", "8", "8", "--output", tempfile.mkdtemp(), ] args = parse_args(commands=commands) main(commands=args) @pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU") def test_t5_gpu(): commands = [ "--model", "t5-small", "--task", "text-generation", "--backend", "onnx", "--seq-len", "16", "16", "16", "--output", tempfile.mkdtemp(), "--generative-model", "t5", "--nb-measures", "5", ] args = parse_args(commands=commands) main(commands=args)