File size: 4,533 Bytes
e0c2d04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#  Copyright 2022, Lefebvre Dalloz Services
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""
Command line args parser
"""

import argparse
from typing import List


def parse_args(commands: List[str] = None) -> argparse.Namespace:
    """
    Parse command line arguments
    :param commands: to provide command line programatically
    :return: parsed command line
    """
    parser = argparse.ArgumentParser(
        description="optimize and deploy transformers", formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument("-m", "--model", required=True, help="path to model or URL to Hugging Face hub")
    parser.add_argument("-t", "--tokenizer", help="path to tokenizer or URL to Hugging Face hub")
    parser.add_argument(
        "--task",
        default="classification",
        choices=["classification", "embedding", "text-generation", "token-classification", "question-answering"],
        help="task to manage. embeddings is for sentence-transformers models",
    )
    parser.add_argument(
        "--generative-model",
        default="gpt",
        choices=["gpt", "t5"],
        help="which model to use for text generation. Models supported are: GPT and T5",
    )
    parser.add_argument(
        "--auth-token",
        default=None,
        help=(
            "Hugging Face Hub auth token. Set to `None` (default) for public models. "
            "For private models, use `True` to use local cached token, or a string of your HF API token"
        ),
    )
    parser.add_argument(
        "--load-external-data",
        default=False,
        help="whether to load external data. It may be used for loading large models (> 2 Gb).",
        action="store_true",
    )
    parser.add_argument("--no-load-external-data", dest="load_external_data", action="store_false")
    parser.add_argument(
        "-b",
        "--batch-size",
        default=[1, 1, 1],
        help="batch sizes to optimize for (min, optimal, max). Used by TensorRT and benchmarks.",
        type=int,
        nargs=3,
    )
    parser.add_argument(
        "-s",
        "--seq-len",
        default=[16, 16, 16],
        help="sequence lengths to optimize for (min, optimal, max). Used by TensorRT and benchmarks.",
        type=int,
        nargs=3,
    )
    parser.add_argument("-q", "--quantization", action="store_true", help="INT-8 GPU quantization support")
    parser.add_argument("-w", "--workspace-size", default=10000, help="workspace size in MiB (TensorRT)", type=int)
    parser.add_argument("-o", "--output", default="triton_models", help="name to be used for ")
    parser.add_argument("-n", "--name", default="transformer", help="model name to be used in triton server")
    parser.add_argument("-v", "--verbose", action="store_true", help="display detailed information")
    parser.add_argument("--fast", action="store_true", help="skip the Pytorch (FP16) benchmark")
    parser.add_argument(
        "--backend",
        default=["onnx"],
        help="backend to use. multiple args accepted.",
        nargs="*",
        choices=["onnx", "tensorrt"],
    )
    parser.add_argument(
        "-d",
        "--device",
        default=None,
        help="device to use. If not set, will be cuda if available.",
        choices=["cpu", "cuda"],
    )
    parser.add_argument("--nb-threads", default=1, help="# of CPU threads to use for inference", type=int)
    parser.add_argument(
        "--nb-instances", default=1, help="# of model instances, may improve throughput (Triton)", type=int
    )
    parser.add_argument("--warmup", default=10, help="# of inferences to warm each model", type=int)
    parser.add_argument("--nb-measures", default=1000, help="# of inferences for benchmarks", type=int)
    parser.add_argument("--seed", default=123, help="seed for random inputs, etc.", type=int)
    parser.add_argument("--atol", default=3e-1, help="tolerance when comparing outputs to Pytorch ones", type=float)
    args, _ = parser.parse_known_args(args=commands)
    return args