|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
Shared functions related to benchmarks. |
|
""" |
|
|
|
import logging |
|
import time |
|
from contextlib import contextmanager |
|
from typing import Dict, List, Union |
|
|
|
import numpy as np |
|
import torch |
|
|
|
|
|
def print_timings(name: str, timings: List[float]) -> None: |
|
""" |
|
Format and print inference latencies. |
|
|
|
:param name: inference engine name |
|
:param timings: latencies measured during the inference |
|
""" |
|
mean_time = 1e3 * np.mean(timings) |
|
std_time = 1e3 * np.std(timings) |
|
min_time = 1e3 * np.min(timings) |
|
max_time = 1e3 * np.max(timings) |
|
median, percent_95_time, percent_99_time = 1e3 * np.percentile(timings, [50, 95, 99]) |
|
print( |
|
f"[{name}] " |
|
f"mean={mean_time:.2f}ms, " |
|
f"sd={std_time:.2f}ms, " |
|
f"min={min_time:.2f}ms, " |
|
f"max={max_time:.2f}ms, " |
|
f"median={median:.2f}ms, " |
|
f"95p={percent_95_time:.2f}ms, " |
|
f"99p={percent_99_time:.2f}ms" |
|
) |
|
|
|
|
|
def setup_logging(level: int = logging.INFO) -> None: |
|
""" |
|
Set the generic Python logger |
|
:param level: logger level |
|
""" |
|
logging.basicConfig(format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=level) |
|
|
|
|
|
@contextmanager |
|
def track_infer_time(buffer: List[int]) -> None: |
|
""" |
|
A context manager to perform latency measures |
|
:param buffer: a List where to save latencies for each input |
|
""" |
|
start = time.perf_counter() |
|
yield |
|
end = time.perf_counter() |
|
buffer.append(end - start) |
|
|
|
|
|
def generate_input( |
|
seq_len: int, batch_size: int, input_names: List[str], device: str = "cuda" |
|
) -> Dict[str, torch.Tensor]: |
|
""" |
|
Generate dummy inputs. |
|
:param seq_len: number of token per input. |
|
:param batch_size: first dimension of the tensor |
|
:param input_names: tensor input names to generate |
|
:param device: where to store tensors (Pytorch only). One of [cpu, cuda] |
|
:return: Pytorch tensors |
|
""" |
|
assert device in ["cpu", "cuda"] |
|
shape = (batch_size, seq_len) |
|
inputs_pytorch: Dict[str, torch.Tensor] = { |
|
name: torch.ones(size=shape, dtype=torch.int32, device=device) for name in input_names |
|
} |
|
return inputs_pytorch |
|
|
|
|
|
def generate_multiple_inputs( |
|
seq_len: int, batch_size: int, input_names: List[str], nb_inputs_to_gen: int, device: str |
|
) -> List[Dict[str, torch.Tensor]]: |
|
""" |
|
Generate multiple random inputs. |
|
|
|
:param seq_len: sequence length to generate |
|
:param batch_size: number of sequences per batch to generate |
|
:param input_names: tensor input names to generate |
|
:param nb_inputs_to_gen: number of batches of sequences to generate |
|
:param device: one of [cpu, cuda] |
|
:return: generated sequences |
|
""" |
|
all_inputs_pytorch: List[Dict[str, torch.Tensor]] = list() |
|
for _ in range(nb_inputs_to_gen): |
|
inputs_pytorch = generate_input(seq_len=seq_len, batch_size=batch_size, input_names=input_names, device=device) |
|
all_inputs_pytorch.append(inputs_pytorch) |
|
return all_inputs_pytorch |
|
|
|
|
|
def to_numpy(tensors: List[Union[np.ndarray, torch.Tensor]]) -> np.ndarray: |
|
""" |
|
Convert list of torch / numpy tensors to a numpy tensor |
|
:param tensors: list of torch / numpy tensors |
|
:return: numpy tensor |
|
""" |
|
if isinstance(tensors[0], torch.Tensor): |
|
pytorch_output = [t.detach().cpu().numpy() for t in tensors] |
|
elif isinstance(tensors[0], np.ndarray): |
|
pytorch_output = tensors |
|
elif isinstance(tensors[0], (tuple, list)): |
|
pytorch_output = [to_numpy(t) for t in tensors] |
|
else: |
|
raise Exception(f"unknown tensor type: {type(tensors[0])}") |
|
return np.asarray(pytorch_output) |
|
|
|
|
|
def compare_outputs(pytorch_output: np.ndarray, engine_output: np.ndarray) -> float: |
|
""" |
|
Compare 2 model outputs by computing the mean of absolute value difference between them. |
|
|
|
:param pytorch_output: reference output |
|
:param engine_output: other engine output |
|
:return: difference between outputs as a single float |
|
""" |
|
return np.mean(np.abs(pytorch_output - engine_output)) |
|
|