storage / ASR /transformer-deploy /tests /test_models.py

Upload folder using huggingface_hub

e0c2d04 verified about 1 year ago

6.05 kB

	# Copyright 2022, Lefebvre Dalloz Services
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	import tempfile

	import pytest
	import torch

	from transformer_deploy.convert import main
	from transformer_deploy.utils.args import parse_args


	@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU")
	def test_albert_gpu():
	commands = [
	"--model",
	"nreimers/albert-small-v2",
	"--backend",
	"tensorrt",
	"onnx",
	"--batch",
	"1",
	"16",
	"16",
	"--seq-len",
	"8",
	"8",
	"8",
	"--output",
	tempfile.mkdtemp(),
	]
	args = parse_args(commands=commands)
	main(commands=args)


	def test_minilm_cpu():
	commands = [
	"--model",
	"philschmid/MiniLM-L6-H384-uncased-sst2",
	"--backend",
	"onnx",
	"--batch",
	"1",
	"1",
	"1",
	"--seq-len",
	"8",
	"8",
	"8",
	"--device",
	"cpu",
	"--warmup",
	"5",
	"--nb-measures",
	"10",
	"--nb-threads",
	"2",
	"--output",
	tempfile.mkdtemp(),
	]
	args = parse_args(commands=commands)
	main(commands=args)


	@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU")
	def test_minilm_quantization():
	commands = [
	"--model",
	"philschmid/MiniLM-L6-H384-uncased-sst2",
	"--backend",
	"onnx",
	"--batch",
	"1",
	"1",
	"1",
	"--seq-len",
	"8",
	"8",
	"8",
	"--warmup",
	"5",
	"--nb-measures",
	"10",
	"--nb-threads",
	"2",
	"--quantization",
	"--output",
	tempfile.mkdtemp(),
	]
	args = parse_args(commands=commands)
	main(commands=args)


	@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU")
	def test_camembert_gpu():
	commands = [
	"--model",
	"camembert-base",
	"--backend",
	"tensorrt",
	"onnx",
	"--batch",
	"1",
	"16",
	"16",
	"--seq-len",
	"8",
	"8",
	"8",
	"--output",
	tempfile.mkdtemp(),
	]
	args = parse_args(commands=commands)
	main(commands=args)


	@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU")
	def test_electra_gpu():
	commands = [
	"--model",
	"google/electra-small-discriminator",
	"--backend",
	"tensorrt",
	"onnx",
	"--batch",
	"1",
	"16",
	"16",
	"--seq-len",
	"8",
	"8",
	"8",
	"--output",
	tempfile.mkdtemp(),
	]
	args = parse_args(commands=commands)
	main(commands=args)


	# FAILED tests/test_models.py::test_sentence_transformers_cpu - RuntimeError: Error in execution: Non-zero status
	# code returned while running EmbedLayerNormalization node. Name:'EmbedLayerNormalization_0' Status
	# Message: input_ids and position_ids shall have same shape
	def test_sentence_transformers_cpu():
	commands = [
	"--model",
	"sentence-transformers/all-MiniLM-L6-v2",
	"--backend",
	"onnx",
	"--task",
	"embedding",
	"--batch",
	"1",
	"16",
	"16",
	"--seq-len",
	"8",
	"8",
	"8",
	"--device",
	"cpu",
	"--output",
	tempfile.mkdtemp(),
	]
	args = parse_args(commands=commands)
	main(commands=args)


	@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU")
	def test_gpt2_gpu():
	commands = [
	"--model",
	"sshleifer/tiny-gpt2",
	"--task",
	"text-generation",
	"--backend",
	"onnx",
	"tensorrt",
	"--batch",
	"1",
	"16",
	"16",
	"--seq-len",
	"8",
	"8",
	"8",
	"--output",
	tempfile.mkdtemp(),
	]
	args = parse_args(commands=commands)
	main(commands=args)


	@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU")
	def test_bert_ner_gpu():
	commands = [
	"--model",
	"kamalkraj/bert-base-cased-ner-conll2003",
	"--task",
	"token-classification",
	"--backend",
	"onnx",
	"--batch",
	"1",
	"16",
	"16",
	"--seq-len",
	"8",
	"8",
	"8",
	"--output",
	tempfile.mkdtemp(),
	]
	args = parse_args(commands=commands)
	main(commands=args)


	@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU")
	def test_qa_gpu():
	commands = [
	"--model",
	"distilbert-base-cased-distilled-squad",
	"--task",
	"question-answering",
	"--backend",
	"onnx",
	"--batch",
	"1",
	"16",
	"16",
	"--seq-len",
	"8",
	"8",
	"8",
	"--output",
	tempfile.mkdtemp(),
	]
	args = parse_args(commands=commands)
	main(commands=args)


	@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU")
	def test_t5_gpu():
	commands = [
	"--model",
	"t5-small",
	"--task",
	"text-generation",
	"--backend",
	"onnx",
	"--seq-len",
	"16",
	"16",
	"16",
	"--output",
	tempfile.mkdtemp(),
	"--generative-model",
	"t5",
	"--nb-measures",
	"5",
	]
	args = parse_args(commands=commands)
	main(commands=args)