|
|
|
"""PyTorch Inference Script |
|
|
|
An example inference script that outputs top-k class ids for images in a folder into a csv. |
|
|
|
Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman) |
|
""" |
|
import argparse |
|
import json |
|
import logging |
|
import os |
|
import time |
|
from contextlib import suppress |
|
from functools import partial |
|
|
|
import numpy as np |
|
import pandas as pd |
|
import torch |
|
|
|
from timm.data import create_dataset, create_loader, resolve_data_config, ImageNetInfo, infer_imagenet_subset |
|
from timm.layers import apply_test_time_pool |
|
from timm.models import create_model |
|
from timm.utils import AverageMeter, setup_default_logging, set_jit_fuser, ParseKwargs |
|
|
|
try: |
|
from apex import amp |
|
has_apex = True |
|
except ImportError: |
|
has_apex = False |
|
|
|
has_native_amp = False |
|
try: |
|
if getattr(torch.cuda.amp, 'autocast') is not None: |
|
has_native_amp = True |
|
except AttributeError: |
|
pass |
|
|
|
try: |
|
from functorch.compile import memory_efficient_fusion |
|
has_functorch = True |
|
except ImportError as e: |
|
has_functorch = False |
|
|
|
has_compile = hasattr(torch, 'compile') |
|
|
|
|
|
_FMT_EXT = { |
|
'json': '.json', |
|
'json-record': '.json', |
|
'json-split': '.json', |
|
'parquet': '.parquet', |
|
'csv': '.csv', |
|
} |
|
|
|
torch.backends.cudnn.benchmark = True |
|
_logger = logging.getLogger('inference') |
|
|
|
|
|
parser = argparse.ArgumentParser(description='PyTorch ImageNet Inference') |
|
parser.add_argument('data', nargs='?', metavar='DIR', const=None, |
|
help='path to dataset (*deprecated*, use --data-dir)') |
|
parser.add_argument('--data-dir', metavar='DIR', |
|
help='path to dataset (root dir)') |
|
parser.add_argument('--dataset', metavar='NAME', default='', |
|
help='dataset type + name ("<type>/<name>") (default: ImageFolder or ImageTar if empty)') |
|
parser.add_argument('--split', metavar='NAME', default='validation', |
|
help='dataset split (default: validation)') |
|
parser.add_argument('--model', '-m', metavar='MODEL', default='resnet50', |
|
help='model architecture (default: resnet50)') |
|
parser.add_argument('-j', '--workers', default=2, type=int, metavar='N', |
|
help='number of data loading workers (default: 2)') |
|
parser.add_argument('-b', '--batch-size', default=256, type=int, |
|
metavar='N', help='mini-batch size (default: 256)') |
|
parser.add_argument('--img-size', default=None, type=int, |
|
metavar='N', help='Input image dimension, uses model default if empty') |
|
parser.add_argument('--in-chans', type=int, default=None, metavar='N', |
|
help='Image input channels (default: None => 3)') |
|
parser.add_argument('--input-size', default=None, nargs=3, type=int, |
|
metavar='N N N', help='Input all image dimensions (d h w, e.g. --input-size 3 224 224), uses model default if empty') |
|
parser.add_argument('--use-train-size', action='store_true', default=False, |
|
help='force use of train input size, even when test size is specified in pretrained cfg') |
|
parser.add_argument('--crop-pct', default=None, type=float, |
|
metavar='N', help='Input image center crop pct') |
|
parser.add_argument('--crop-mode', default=None, type=str, |
|
metavar='N', help='Input image crop mode (squash, border, center). Model default if None.') |
|
parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN', |
|
help='Override mean pixel value of dataset') |
|
parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD', |
|
help='Override std deviation of of dataset') |
|
parser.add_argument('--interpolation', default='', type=str, metavar='NAME', |
|
help='Image resize interpolation type (overrides model)') |
|
parser.add_argument('--num-classes', type=int, default=None, |
|
help='Number classes in dataset') |
|
parser.add_argument('--class-map', default='', type=str, metavar='FILENAME', |
|
help='path to class to idx mapping file (default: "")') |
|
parser.add_argument('--log-freq', default=10, type=int, |
|
metavar='N', help='batch logging frequency (default: 10)') |
|
parser.add_argument('--checkpoint', default='', type=str, metavar='PATH', |
|
help='path to latest checkpoint (default: none)') |
|
parser.add_argument('--pretrained', dest='pretrained', action='store_true', |
|
help='use pre-trained model') |
|
parser.add_argument('--num-gpu', type=int, default=1, |
|
help='Number of GPUS to use') |
|
parser.add_argument('--test-pool', dest='test_pool', action='store_true', |
|
help='enable test time pool') |
|
parser.add_argument('--channels-last', action='store_true', default=False, |
|
help='Use channels_last memory layout') |
|
parser.add_argument('--device', default='cuda', type=str, |
|
help="Device (accelerator) to use.") |
|
parser.add_argument('--amp', action='store_true', default=False, |
|
help='use Native AMP for mixed precision training') |
|
parser.add_argument('--amp-dtype', default='float16', type=str, |
|
help='lower precision AMP dtype (default: float16)') |
|
parser.add_argument('--fuser', default='', type=str, |
|
help="Select jit fuser. One of ('', 'te', 'old', 'nvfuser')") |
|
parser.add_argument('--model-kwargs', nargs='*', default={}, action=ParseKwargs) |
|
|
|
scripting_group = parser.add_mutually_exclusive_group() |
|
scripting_group.add_argument('--torchscript', default=False, action='store_true', |
|
help='torch.jit.script the full model') |
|
scripting_group.add_argument('--torchcompile', nargs='?', type=str, default=None, const='inductor', |
|
help="Enable compilation w/ specified backend (default: inductor).") |
|
scripting_group.add_argument('--aot-autograd', default=False, action='store_true', |
|
help="Enable AOT Autograd support.") |
|
|
|
parser.add_argument('--results-dir', type=str, default=None, |
|
help='folder for output results') |
|
parser.add_argument('--results-file', type=str, default=None, |
|
help='results filename (relative to results-dir)') |
|
parser.add_argument('--results-format', type=str, nargs='+', default=['csv'], |
|
help='results format (one of "csv", "json", "json-split", "parquet")') |
|
parser.add_argument('--results-separate-col', action='store_true', default=False, |
|
help='separate output columns per result index.') |
|
parser.add_argument('--topk', default=1, type=int, |
|
metavar='N', help='Top-k to output to CSV') |
|
parser.add_argument('--fullname', action='store_true', default=False, |
|
help='use full sample name in output (not just basename).') |
|
parser.add_argument('--filename-col', type=str, default='filename', |
|
help='name for filename / sample name column') |
|
parser.add_argument('--index-col', type=str, default='index', |
|
help='name for output indices column(s)') |
|
parser.add_argument('--label-col', type=str, default='label', |
|
help='name for output indices column(s)') |
|
parser.add_argument('--output-col', type=str, default=None, |
|
help='name for logit/probs output column(s)') |
|
parser.add_argument('--output-type', type=str, default='prob', |
|
help='output type colum ("prob" for probabilities, "logit" for raw logits)') |
|
parser.add_argument('--label-type', type=str, default='description', |
|
help='type of label to output, one of "none", "name", "description", "detailed"') |
|
parser.add_argument('--include-index', action='store_true', default=False, |
|
help='include the class index in results') |
|
parser.add_argument('--exclude-output', action='store_true', default=False, |
|
help='exclude logits/probs from results, just indices. topk must be set !=0.') |
|
|
|
|
|
def main(): |
|
setup_default_logging() |
|
args = parser.parse_args() |
|
|
|
args.pretrained = args.pretrained or not args.checkpoint |
|
|
|
if torch.cuda.is_available(): |
|
torch.backends.cuda.matmul.allow_tf32 = True |
|
torch.backends.cudnn.benchmark = True |
|
|
|
device = torch.device(args.device) |
|
|
|
|
|
amp_autocast = suppress |
|
if args.amp: |
|
assert has_native_amp, 'Please update PyTorch to a version with native AMP (or use APEX).' |
|
assert args.amp_dtype in ('float16', 'bfloat16') |
|
amp_dtype = torch.bfloat16 if args.amp_dtype == 'bfloat16' else torch.float16 |
|
amp_autocast = partial(torch.autocast, device_type=device.type, dtype=amp_dtype) |
|
_logger.info('Running inference in mixed precision with native PyTorch AMP.') |
|
else: |
|
_logger.info('Running inference in float32. AMP not enabled.') |
|
|
|
if args.fuser: |
|
set_jit_fuser(args.fuser) |
|
|
|
|
|
in_chans = 3 |
|
if args.in_chans is not None: |
|
in_chans = args.in_chans |
|
elif args.input_size is not None: |
|
in_chans = args.input_size[0] |
|
|
|
model = create_model( |
|
args.model, |
|
num_classes=args.num_classes, |
|
in_chans=in_chans, |
|
pretrained=args.pretrained, |
|
checkpoint_path=args.checkpoint, |
|
**args.model_kwargs, |
|
) |
|
if args.num_classes is None: |
|
assert hasattr(model, 'num_classes'), 'Model must have `num_classes` attr if not set on cmd line/config.' |
|
args.num_classes = model.num_classes |
|
|
|
_logger.info( |
|
f'Model {args.model} created, param count: {sum([m.numel() for m in model.parameters()])}') |
|
|
|
data_config = resolve_data_config(vars(args), model=model) |
|
test_time_pool = False |
|
if args.test_pool: |
|
model, test_time_pool = apply_test_time_pool(model, data_config) |
|
|
|
model = model.to(device) |
|
model.eval() |
|
if args.channels_last: |
|
model = model.to(memory_format=torch.channels_last) |
|
|
|
if args.torchscript: |
|
model = torch.jit.script(model) |
|
elif args.torchcompile: |
|
assert has_compile, 'A version of torch w/ torch.compile() is required for --compile, possibly a nightly.' |
|
torch._dynamo.reset() |
|
model = torch.compile(model, backend=args.torchcompile) |
|
elif args.aot_autograd: |
|
assert has_functorch, "functorch is needed for --aot-autograd" |
|
model = memory_efficient_fusion(model) |
|
|
|
if args.num_gpu > 1: |
|
model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpu))) |
|
|
|
root_dir = args.data or args.data_dir |
|
dataset = create_dataset( |
|
root=root_dir, |
|
name=args.dataset, |
|
split=args.split, |
|
class_map=args.class_map, |
|
) |
|
|
|
if test_time_pool: |
|
data_config['crop_pct'] = 1.0 |
|
|
|
workers = 1 if 'tfds' in args.dataset or 'wds' in args.dataset else args.workers |
|
loader = create_loader( |
|
dataset, |
|
batch_size=args.batch_size, |
|
use_prefetcher=True, |
|
num_workers=workers, |
|
device=device, |
|
**data_config, |
|
) |
|
|
|
to_label = None |
|
if args.label_type in ('name', 'description', 'detail'): |
|
imagenet_subset = infer_imagenet_subset(model) |
|
if imagenet_subset is not None: |
|
dataset_info = ImageNetInfo(imagenet_subset) |
|
if args.label_type == 'name': |
|
to_label = lambda x: dataset_info.index_to_label_name(x) |
|
elif args.label_type == 'detail': |
|
to_label = lambda x: dataset_info.index_to_description(x, detailed=True) |
|
else: |
|
to_label = lambda x: dataset_info.index_to_description(x) |
|
to_label = np.vectorize(to_label) |
|
else: |
|
_logger.error("Cannot deduce ImageNet subset from model, no labelling will be performed.") |
|
|
|
top_k = min(args.topk, args.num_classes) |
|
batch_time = AverageMeter() |
|
end = time.time() |
|
all_indices = [] |
|
all_labels = [] |
|
all_outputs = [] |
|
use_probs = args.output_type == 'prob' |
|
with torch.no_grad(): |
|
for batch_idx, (input, _) in enumerate(loader): |
|
|
|
with amp_autocast(): |
|
output = model(input) |
|
|
|
if use_probs: |
|
output = output.softmax(-1) |
|
|
|
if top_k: |
|
output, indices = output.topk(top_k) |
|
np_indices = indices.cpu().numpy() |
|
if args.include_index: |
|
all_indices.append(np_indices) |
|
if to_label is not None: |
|
np_labels = to_label(np_indices) |
|
all_labels.append(np_labels) |
|
|
|
all_outputs.append(output.cpu().numpy()) |
|
|
|
|
|
batch_time.update(time.time() - end) |
|
end = time.time() |
|
|
|
if batch_idx % args.log_freq == 0: |
|
_logger.info('Predict: [{0}/{1}] Time {batch_time.val:.3f} ({batch_time.avg:.3f})'.format( |
|
batch_idx, len(loader), batch_time=batch_time)) |
|
|
|
all_indices = np.concatenate(all_indices, axis=0) if all_indices else None |
|
all_labels = np.concatenate(all_labels, axis=0) if all_labels else None |
|
all_outputs = np.concatenate(all_outputs, axis=0).astype(np.float32) |
|
filenames = loader.dataset.filenames(basename=not args.fullname) |
|
|
|
output_col = args.output_col or ('prob' if use_probs else 'logit') |
|
data_dict = {args.filename_col: filenames} |
|
if args.results_separate_col and all_outputs.shape[-1] > 1: |
|
if all_indices is not None: |
|
for i in range(all_indices.shape[-1]): |
|
data_dict[f'{args.index_col}_{i}'] = all_indices[:, i] |
|
if all_labels is not None: |
|
for i in range(all_labels.shape[-1]): |
|
data_dict[f'{args.label_col}_{i}'] = all_labels[:, i] |
|
for i in range(all_outputs.shape[-1]): |
|
data_dict[f'{output_col}_{i}'] = all_outputs[:, i] |
|
else: |
|
if all_indices is not None: |
|
if all_indices.shape[-1] == 1: |
|
all_indices = all_indices.squeeze(-1) |
|
data_dict[args.index_col] = list(all_indices) |
|
if all_labels is not None: |
|
if all_labels.shape[-1] == 1: |
|
all_labels = all_labels.squeeze(-1) |
|
data_dict[args.label_col] = list(all_labels) |
|
if all_outputs.shape[-1] == 1: |
|
all_outputs = all_outputs.squeeze(-1) |
|
data_dict[output_col] = list(all_outputs) |
|
|
|
df = pd.DataFrame(data=data_dict) |
|
|
|
results_filename = args.results_file |
|
if results_filename: |
|
filename_no_ext, ext = os.path.splitext(results_filename) |
|
if ext and ext in _FMT_EXT.values(): |
|
|
|
|
|
results_filename = filename_no_ext |
|
else: |
|
|
|
img_size = data_config["input_size"][1] |
|
results_filename = f'{args.model}-{img_size}' |
|
|
|
if args.results_dir: |
|
results_filename = os.path.join(args.results_dir, results_filename) |
|
|
|
for fmt in args.results_format: |
|
save_results(df, results_filename, fmt) |
|
|
|
print(f'--result') |
|
print(df.set_index(args.filename_col).to_json(orient='index', indent=4)) |
|
|
|
|
|
def save_results(df, results_filename, results_format='csv', filename_col='filename'): |
|
results_filename += _FMT_EXT[results_format] |
|
if results_format == 'parquet': |
|
df.set_index(filename_col).to_parquet(results_filename) |
|
elif results_format == 'json': |
|
df.set_index(filename_col).to_json(results_filename, indent=4, orient='index') |
|
elif results_format == 'json-records': |
|
df.to_json(results_filename, lines=True, orient='records') |
|
elif results_format == 'json-split': |
|
df.to_json(results_filename, indent=4, orient='split', index=False) |
|
else: |
|
df.to_csv(results_filename, index=False) |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|