|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import tempfile |
|
|
|
import pytest |
|
import torch |
|
|
|
from transformer_deploy.convert import main |
|
from transformer_deploy.utils.args import parse_args |
|
|
|
|
|
@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU") |
|
def test_albert_gpu(): |
|
commands = [ |
|
"--model", |
|
"nreimers/albert-small-v2", |
|
"--backend", |
|
"tensorrt", |
|
"onnx", |
|
"--batch", |
|
"1", |
|
"16", |
|
"16", |
|
"--seq-len", |
|
"8", |
|
"8", |
|
"8", |
|
"--output", |
|
tempfile.mkdtemp(), |
|
] |
|
args = parse_args(commands=commands) |
|
main(commands=args) |
|
|
|
|
|
def test_minilm_cpu(): |
|
commands = [ |
|
"--model", |
|
"philschmid/MiniLM-L6-H384-uncased-sst2", |
|
"--backend", |
|
"onnx", |
|
"--batch", |
|
"1", |
|
"1", |
|
"1", |
|
"--seq-len", |
|
"8", |
|
"8", |
|
"8", |
|
"--device", |
|
"cpu", |
|
"--warmup", |
|
"5", |
|
"--nb-measures", |
|
"10", |
|
"--nb-threads", |
|
"2", |
|
"--output", |
|
tempfile.mkdtemp(), |
|
] |
|
args = parse_args(commands=commands) |
|
main(commands=args) |
|
|
|
|
|
@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU") |
|
def test_minilm_quantization(): |
|
commands = [ |
|
"--model", |
|
"philschmid/MiniLM-L6-H384-uncased-sst2", |
|
"--backend", |
|
"onnx", |
|
"--batch", |
|
"1", |
|
"1", |
|
"1", |
|
"--seq-len", |
|
"8", |
|
"8", |
|
"8", |
|
"--warmup", |
|
"5", |
|
"--nb-measures", |
|
"10", |
|
"--nb-threads", |
|
"2", |
|
"--quantization", |
|
"--output", |
|
tempfile.mkdtemp(), |
|
] |
|
args = parse_args(commands=commands) |
|
main(commands=args) |
|
|
|
|
|
@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU") |
|
def test_camembert_gpu(): |
|
commands = [ |
|
"--model", |
|
"camembert-base", |
|
"--backend", |
|
"tensorrt", |
|
"onnx", |
|
"--batch", |
|
"1", |
|
"16", |
|
"16", |
|
"--seq-len", |
|
"8", |
|
"8", |
|
"8", |
|
"--output", |
|
tempfile.mkdtemp(), |
|
] |
|
args = parse_args(commands=commands) |
|
main(commands=args) |
|
|
|
|
|
@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU") |
|
def test_electra_gpu(): |
|
commands = [ |
|
"--model", |
|
"google/electra-small-discriminator", |
|
"--backend", |
|
"tensorrt", |
|
"onnx", |
|
"--batch", |
|
"1", |
|
"16", |
|
"16", |
|
"--seq-len", |
|
"8", |
|
"8", |
|
"8", |
|
"--output", |
|
tempfile.mkdtemp(), |
|
] |
|
args = parse_args(commands=commands) |
|
main(commands=args) |
|
|
|
|
|
|
|
|
|
|
|
def test_sentence_transformers_cpu(): |
|
commands = [ |
|
"--model", |
|
"sentence-transformers/all-MiniLM-L6-v2", |
|
"--backend", |
|
"onnx", |
|
"--task", |
|
"embedding", |
|
"--batch", |
|
"1", |
|
"16", |
|
"16", |
|
"--seq-len", |
|
"8", |
|
"8", |
|
"8", |
|
"--device", |
|
"cpu", |
|
"--output", |
|
tempfile.mkdtemp(), |
|
] |
|
args = parse_args(commands=commands) |
|
main(commands=args) |
|
|
|
|
|
@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU") |
|
def test_gpt2_gpu(): |
|
commands = [ |
|
"--model", |
|
"sshleifer/tiny-gpt2", |
|
"--task", |
|
"text-generation", |
|
"--backend", |
|
"onnx", |
|
"tensorrt", |
|
"--batch", |
|
"1", |
|
"16", |
|
"16", |
|
"--seq-len", |
|
"8", |
|
"8", |
|
"8", |
|
"--output", |
|
tempfile.mkdtemp(), |
|
] |
|
args = parse_args(commands=commands) |
|
main(commands=args) |
|
|
|
|
|
@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU") |
|
def test_bert_ner_gpu(): |
|
commands = [ |
|
"--model", |
|
"kamalkraj/bert-base-cased-ner-conll2003", |
|
"--task", |
|
"token-classification", |
|
"--backend", |
|
"onnx", |
|
"--batch", |
|
"1", |
|
"16", |
|
"16", |
|
"--seq-len", |
|
"8", |
|
"8", |
|
"8", |
|
"--output", |
|
tempfile.mkdtemp(), |
|
] |
|
args = parse_args(commands=commands) |
|
main(commands=args) |
|
|
|
|
|
@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU") |
|
def test_qa_gpu(): |
|
commands = [ |
|
"--model", |
|
"distilbert-base-cased-distilled-squad", |
|
"--task", |
|
"question-answering", |
|
"--backend", |
|
"onnx", |
|
"--batch", |
|
"1", |
|
"16", |
|
"16", |
|
"--seq-len", |
|
"8", |
|
"8", |
|
"8", |
|
"--output", |
|
tempfile.mkdtemp(), |
|
] |
|
args = parse_args(commands=commands) |
|
main(commands=args) |
|
|
|
|
|
@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU") |
|
def test_t5_gpu(): |
|
commands = [ |
|
"--model", |
|
"t5-small", |
|
"--task", |
|
"text-generation", |
|
"--backend", |
|
"onnx", |
|
"--seq-len", |
|
"16", |
|
"16", |
|
"16", |
|
"--output", |
|
tempfile.mkdtemp(), |
|
"--generative-model", |
|
"t5", |
|
"--nb-measures", |
|
"5", |
|
] |
|
args = parse_args(commands=commands) |
|
main(commands=args) |
|
|