File size: 4,644 Bytes
e0c2d04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#  Copyright 2022, Lefebvre Dalloz Services
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""
Shared functions related to benchmarks.
"""

import logging
import time
from contextlib import contextmanager
from typing import Dict, List, Union

import numpy as np
import torch


def print_timings(name: str, timings: List[float]) -> None:
    """
    Format and print inference latencies.

    :param name: inference engine name
    :param timings: latencies measured during the inference
    """
    mean_time = 1e3 * np.mean(timings)
    std_time = 1e3 * np.std(timings)
    min_time = 1e3 * np.min(timings)
    max_time = 1e3 * np.max(timings)
    median, percent_95_time, percent_99_time = 1e3 * np.percentile(timings, [50, 95, 99])
    print(
        f"[{name}] "
        f"mean={mean_time:.2f}ms, "
        f"sd={std_time:.2f}ms, "
        f"min={min_time:.2f}ms, "
        f"max={max_time:.2f}ms, "
        f"median={median:.2f}ms, "
        f"95p={percent_95_time:.2f}ms, "
        f"99p={percent_99_time:.2f}ms"
    )


def setup_logging(level: int = logging.INFO) -> None:
    """
    Set the generic Python logger
    :param level: logger level
    """
    logging.basicConfig(format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=level)


@contextmanager
def track_infer_time(buffer: List[int]) -> None:
    """
    A context manager to perform latency measures
    :param buffer: a List where to save latencies for each input
    """
    start = time.perf_counter()
    yield
    end = time.perf_counter()
    buffer.append(end - start)


def generate_input(
    seq_len: int, batch_size: int, input_names: List[str], device: str = "cuda"
) -> Dict[str, torch.Tensor]:
    """
    Generate dummy inputs.
    :param seq_len: number of token per input.
    :param batch_size: first dimension of the tensor
    :param input_names: tensor input names to generate
    :param device: where to store tensors (Pytorch only). One of [cpu, cuda]
    :return: Pytorch tensors
    """
    assert device in ["cpu", "cuda"]
    shape = (batch_size, seq_len)
    inputs_pytorch: Dict[str, torch.Tensor] = {
        name: torch.ones(size=shape, dtype=torch.int32, device=device) for name in input_names
    }
    return inputs_pytorch


def generate_multiple_inputs(
    seq_len: int, batch_size: int, input_names: List[str], nb_inputs_to_gen: int, device: str
) -> List[Dict[str, torch.Tensor]]:
    """
    Generate multiple random inputs.

    :param seq_len: sequence length to generate
    :param batch_size: number of sequences per batch to generate
    :param input_names: tensor input names to generate
    :param nb_inputs_to_gen: number of batches of sequences to generate
    :param device: one of [cpu, cuda]
    :return: generated sequences
    """
    all_inputs_pytorch: List[Dict[str, torch.Tensor]] = list()
    for _ in range(nb_inputs_to_gen):
        inputs_pytorch = generate_input(seq_len=seq_len, batch_size=batch_size, input_names=input_names, device=device)
        all_inputs_pytorch.append(inputs_pytorch)
    return all_inputs_pytorch


def to_numpy(tensors: List[Union[np.ndarray, torch.Tensor]]) -> np.ndarray:
    """
    Convert list of torch / numpy tensors to a numpy tensor
    :param tensors: list of torch / numpy tensors
    :return: numpy tensor
    """
    if isinstance(tensors[0], torch.Tensor):
        pytorch_output = [t.detach().cpu().numpy() for t in tensors]
    elif isinstance(tensors[0], np.ndarray):
        pytorch_output = tensors
    elif isinstance(tensors[0], (tuple, list)):
        pytorch_output = [to_numpy(t) for t in tensors]
    else:
        raise Exception(f"unknown tensor type: {type(tensors[0])}")
    return np.asarray(pytorch_output)


def compare_outputs(pytorch_output: np.ndarray, engine_output: np.ndarray) -> float:
    """
    Compare 2 model outputs by computing the mean of absolute value difference between them.

    :param pytorch_output: reference output
    :param engine_output: other engine output
    :return: difference between outputs as a single float
    """
    return np.mean(np.abs(pytorch_output - engine_output))