|
|
|
import os.path as osp
|
|
import warnings
|
|
from math import inf
|
|
|
|
import torch.distributed as dist
|
|
from torch.nn.modules.batchnorm import _BatchNorm
|
|
from torch.utils.data import DataLoader
|
|
|
|
from annotator.uniformer.mmcv.fileio import FileClient
|
|
from annotator.uniformer.mmcv.utils import is_seq_of
|
|
from .hook import Hook
|
|
from .logger import LoggerHook
|
|
|
|
|
|
class EvalHook(Hook):
|
|
"""Non-Distributed evaluation hook.
|
|
|
|
This hook will regularly perform evaluation in a given interval when
|
|
performing in non-distributed environment.
|
|
|
|
Args:
|
|
dataloader (DataLoader): A PyTorch dataloader, whose dataset has
|
|
implemented ``evaluate`` function.
|
|
start (int | None, optional): Evaluation starting epoch. It enables
|
|
evaluation before the training starts if ``start`` <= the resuming
|
|
epoch. If None, whether to evaluate is merely decided by
|
|
``interval``. Default: None.
|
|
interval (int): Evaluation interval. Default: 1.
|
|
by_epoch (bool): Determine perform evaluation by epoch or by iteration.
|
|
If set to True, it will perform by epoch. Otherwise, by iteration.
|
|
Default: True.
|
|
save_best (str, optional): If a metric is specified, it would measure
|
|
the best checkpoint during evaluation. The information about best
|
|
checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep
|
|
best score value and best checkpoint path, which will be also
|
|
loaded when resume checkpoint. Options are the evaluation metrics
|
|
on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox
|
|
detection and instance segmentation. ``AR@100`` for proposal
|
|
recall. If ``save_best`` is ``auto``, the first key of the returned
|
|
``OrderedDict`` result will be used. Default: None.
|
|
rule (str | None, optional): Comparison rule for best score. If set to
|
|
None, it will infer a reasonable rule. Keys such as 'acc', 'top'
|
|
.etc will be inferred by 'greater' rule. Keys contain 'loss' will
|
|
be inferred by 'less' rule. Options are 'greater', 'less', None.
|
|
Default: None.
|
|
test_fn (callable, optional): test a model with samples from a
|
|
dataloader, and return the test results. If ``None``, the default
|
|
test function ``mmcv.engine.single_gpu_test`` will be used.
|
|
(default: ``None``)
|
|
greater_keys (List[str] | None, optional): Metric keys that will be
|
|
inferred by 'greater' comparison rule. If ``None``,
|
|
_default_greater_keys will be used. (default: ``None``)
|
|
less_keys (List[str] | None, optional): Metric keys that will be
|
|
inferred by 'less' comparison rule. If ``None``, _default_less_keys
|
|
will be used. (default: ``None``)
|
|
out_dir (str, optional): The root directory to save checkpoints. If not
|
|
specified, `runner.work_dir` will be used by default. If specified,
|
|
the `out_dir` will be the concatenation of `out_dir` and the last
|
|
level directory of `runner.work_dir`.
|
|
`New in version 1.3.16.`
|
|
file_client_args (dict): Arguments to instantiate a FileClient.
|
|
See :class:`mmcv.fileio.FileClient` for details. Default: None.
|
|
`New in version 1.3.16.`
|
|
**eval_kwargs: Evaluation arguments fed into the evaluate function of
|
|
the dataset.
|
|
|
|
Notes:
|
|
If new arguments are added for EvalHook, tools/test.py,
|
|
tools/eval_metric.py may be affected.
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
rule_map = {'greater': lambda x, y: x > y, 'less': lambda x, y: x < y}
|
|
init_value_map = {'greater': -inf, 'less': inf}
|
|
_default_greater_keys = [
|
|
'acc', 'top', 'AR@', 'auc', 'precision', 'mAP', 'mDice', 'mIoU',
|
|
'mAcc', 'aAcc'
|
|
]
|
|
_default_less_keys = ['loss']
|
|
|
|
def __init__(self,
|
|
dataloader,
|
|
start=None,
|
|
interval=1,
|
|
by_epoch=True,
|
|
save_best=None,
|
|
rule=None,
|
|
test_fn=None,
|
|
greater_keys=None,
|
|
less_keys=None,
|
|
out_dir=None,
|
|
file_client_args=None,
|
|
**eval_kwargs):
|
|
if not isinstance(dataloader, DataLoader):
|
|
raise TypeError(f'dataloader must be a pytorch DataLoader, '
|
|
f'but got {type(dataloader)}')
|
|
|
|
if interval <= 0:
|
|
raise ValueError(f'interval must be a positive number, '
|
|
f'but got {interval}')
|
|
|
|
assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean'
|
|
|
|
if start is not None and start < 0:
|
|
raise ValueError(f'The evaluation start epoch {start} is smaller '
|
|
f'than 0')
|
|
|
|
self.dataloader = dataloader
|
|
self.interval = interval
|
|
self.start = start
|
|
self.by_epoch = by_epoch
|
|
|
|
assert isinstance(save_best, str) or save_best is None, \
|
|
'""save_best"" should be a str or None ' \
|
|
f'rather than {type(save_best)}'
|
|
self.save_best = save_best
|
|
self.eval_kwargs = eval_kwargs
|
|
self.initial_flag = True
|
|
|
|
if test_fn is None:
|
|
from annotator.uniformer.mmcv.engine import single_gpu_test
|
|
self.test_fn = single_gpu_test
|
|
else:
|
|
self.test_fn = test_fn
|
|
|
|
if greater_keys is None:
|
|
self.greater_keys = self._default_greater_keys
|
|
else:
|
|
if not isinstance(greater_keys, (list, tuple)):
|
|
greater_keys = (greater_keys, )
|
|
assert is_seq_of(greater_keys, str)
|
|
self.greater_keys = greater_keys
|
|
|
|
if less_keys is None:
|
|
self.less_keys = self._default_less_keys
|
|
else:
|
|
if not isinstance(less_keys, (list, tuple)):
|
|
less_keys = (less_keys, )
|
|
assert is_seq_of(less_keys, str)
|
|
self.less_keys = less_keys
|
|
|
|
if self.save_best is not None:
|
|
self.best_ckpt_path = None
|
|
self._init_rule(rule, self.save_best)
|
|
|
|
self.out_dir = out_dir
|
|
self.file_client_args = file_client_args
|
|
|
|
def _init_rule(self, rule, key_indicator):
|
|
"""Initialize rule, key_indicator, comparison_func, and best score.
|
|
|
|
Here is the rule to determine which rule is used for key indicator
|
|
when the rule is not specific (note that the key indicator matching
|
|
is case-insensitive):
|
|
1. If the key indicator is in ``self.greater_keys``, the rule will be
|
|
specified as 'greater'.
|
|
2. Or if the key indicator is in ``self.less_keys``, the rule will be
|
|
specified as 'less'.
|
|
3. Or if the key indicator is equal to the substring in any one item
|
|
in ``self.greater_keys``, the rule will be specified as 'greater'.
|
|
4. Or if the key indicator is equal to the substring in any one item
|
|
in ``self.less_keys``, the rule will be specified as 'less'.
|
|
|
|
Args:
|
|
rule (str | None): Comparison rule for best score.
|
|
key_indicator (str | None): Key indicator to determine the
|
|
comparison rule.
|
|
"""
|
|
if rule not in self.rule_map and rule is not None:
|
|
raise KeyError(f'rule must be greater, less or None, '
|
|
f'but got {rule}.')
|
|
|
|
if rule is None:
|
|
if key_indicator != 'auto':
|
|
|
|
|
|
key_indicator_lc = key_indicator.lower()
|
|
greater_keys = [key.lower() for key in self.greater_keys]
|
|
less_keys = [key.lower() for key in self.less_keys]
|
|
|
|
if key_indicator_lc in greater_keys:
|
|
rule = 'greater'
|
|
elif key_indicator_lc in less_keys:
|
|
rule = 'less'
|
|
elif any(key in key_indicator_lc for key in greater_keys):
|
|
rule = 'greater'
|
|
elif any(key in key_indicator_lc for key in less_keys):
|
|
rule = 'less'
|
|
else:
|
|
raise ValueError(f'Cannot infer the rule for key '
|
|
f'{key_indicator}, thus a specific rule '
|
|
f'must be specified.')
|
|
self.rule = rule
|
|
self.key_indicator = key_indicator
|
|
if self.rule is not None:
|
|
self.compare_func = self.rule_map[self.rule]
|
|
|
|
def before_run(self, runner):
|
|
if not self.out_dir:
|
|
self.out_dir = runner.work_dir
|
|
|
|
self.file_client = FileClient.infer_client(self.file_client_args,
|
|
self.out_dir)
|
|
|
|
|
|
|
|
|
|
|
|
if self.out_dir != runner.work_dir:
|
|
basename = osp.basename(runner.work_dir.rstrip(osp.sep))
|
|
self.out_dir = self.file_client.join_path(self.out_dir, basename)
|
|
runner.logger.info(
|
|
(f'The best checkpoint will be saved to {self.out_dir} by '
|
|
f'{self.file_client.name}'))
|
|
|
|
if self.save_best is not None:
|
|
if runner.meta is None:
|
|
warnings.warn('runner.meta is None. Creating an empty one.')
|
|
runner.meta = dict()
|
|
runner.meta.setdefault('hook_msgs', dict())
|
|
self.best_ckpt_path = runner.meta['hook_msgs'].get(
|
|
'best_ckpt', None)
|
|
|
|
def before_train_iter(self, runner):
|
|
"""Evaluate the model only at the start of training by iteration."""
|
|
if self.by_epoch or not self.initial_flag:
|
|
return
|
|
if self.start is not None and runner.iter >= self.start:
|
|
self.after_train_iter(runner)
|
|
self.initial_flag = False
|
|
|
|
def before_train_epoch(self, runner):
|
|
"""Evaluate the model only at the start of training by epoch."""
|
|
if not (self.by_epoch and self.initial_flag):
|
|
return
|
|
if self.start is not None and runner.epoch >= self.start:
|
|
self.after_train_epoch(runner)
|
|
self.initial_flag = False
|
|
|
|
def after_train_iter(self, runner):
|
|
"""Called after every training iter to evaluate the results."""
|
|
if not self.by_epoch and self._should_evaluate(runner):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for hook in runner._hooks:
|
|
if isinstance(hook, LoggerHook):
|
|
hook.after_train_iter(runner)
|
|
runner.log_buffer.clear()
|
|
|
|
self._do_evaluate(runner)
|
|
|
|
def after_train_epoch(self, runner):
|
|
"""Called after every training epoch to evaluate the results."""
|
|
if self.by_epoch and self._should_evaluate(runner):
|
|
self._do_evaluate(runner)
|
|
|
|
def _do_evaluate(self, runner):
|
|
"""perform evaluation and save ckpt."""
|
|
results = self.test_fn(runner.model, self.dataloader)
|
|
runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
|
|
key_score = self.evaluate(runner, results)
|
|
|
|
|
|
if self.save_best and key_score:
|
|
self._save_ckpt(runner, key_score)
|
|
|
|
def _should_evaluate(self, runner):
|
|
"""Judge whether to perform evaluation.
|
|
|
|
Here is the rule to judge whether to perform evaluation:
|
|
1. It will not perform evaluation during the epoch/iteration interval,
|
|
which is determined by ``self.interval``.
|
|
2. It will not perform evaluation if the start time is larger than
|
|
current time.
|
|
3. It will not perform evaluation when current time is larger than
|
|
the start time but during epoch/iteration interval.
|
|
|
|
Returns:
|
|
bool: The flag indicating whether to perform evaluation.
|
|
"""
|
|
if self.by_epoch:
|
|
current = runner.epoch
|
|
check_time = self.every_n_epochs
|
|
else:
|
|
current = runner.iter
|
|
check_time = self.every_n_iters
|
|
|
|
if self.start is None:
|
|
if not check_time(runner, self.interval):
|
|
|
|
return False
|
|
elif (current + 1) < self.start:
|
|
|
|
return False
|
|
else:
|
|
|
|
|
|
if (current + 1 - self.start) % self.interval:
|
|
return False
|
|
return True
|
|
|
|
def _save_ckpt(self, runner, key_score):
|
|
"""Save the best checkpoint.
|
|
|
|
It will compare the score according to the compare function, write
|
|
related information (best score, best checkpoint path) and save the
|
|
best checkpoint into ``work_dir``.
|
|
"""
|
|
if self.by_epoch:
|
|
current = f'epoch_{runner.epoch + 1}'
|
|
cur_type, cur_time = 'epoch', runner.epoch + 1
|
|
else:
|
|
current = f'iter_{runner.iter + 1}'
|
|
cur_type, cur_time = 'iter', runner.iter + 1
|
|
|
|
best_score = runner.meta['hook_msgs'].get(
|
|
'best_score', self.init_value_map[self.rule])
|
|
if self.compare_func(key_score, best_score):
|
|
best_score = key_score
|
|
runner.meta['hook_msgs']['best_score'] = best_score
|
|
|
|
if self.best_ckpt_path and self.file_client.isfile(
|
|
self.best_ckpt_path):
|
|
self.file_client.remove(self.best_ckpt_path)
|
|
runner.logger.info(
|
|
(f'The previous best checkpoint {self.best_ckpt_path} was '
|
|
'removed'))
|
|
|
|
best_ckpt_name = f'best_{self.key_indicator}_{current}.pth'
|
|
self.best_ckpt_path = self.file_client.join_path(
|
|
self.out_dir, best_ckpt_name)
|
|
runner.meta['hook_msgs']['best_ckpt'] = self.best_ckpt_path
|
|
|
|
runner.save_checkpoint(
|
|
self.out_dir, best_ckpt_name, create_symlink=False)
|
|
runner.logger.info(
|
|
f'Now best checkpoint is saved as {best_ckpt_name}.')
|
|
runner.logger.info(
|
|
f'Best {self.key_indicator} is {best_score:0.4f} '
|
|
f'at {cur_time} {cur_type}.')
|
|
|
|
def evaluate(self, runner, results):
|
|
"""Evaluate the results.
|
|
|
|
Args:
|
|
runner (:obj:`mmcv.Runner`): The underlined training runner.
|
|
results (list): Output results.
|
|
"""
|
|
eval_res = self.dataloader.dataset.evaluate(
|
|
results, logger=runner.logger, **self.eval_kwargs)
|
|
|
|
for name, val in eval_res.items():
|
|
runner.log_buffer.output[name] = val
|
|
runner.log_buffer.ready = True
|
|
|
|
if self.save_best is not None:
|
|
|
|
|
|
|
|
|
|
if not eval_res:
|
|
warnings.warn(
|
|
'Since `eval_res` is an empty dict, the behavior to save '
|
|
'the best checkpoint will be skipped in this evaluation.')
|
|
return None
|
|
|
|
if self.key_indicator == 'auto':
|
|
|
|
self._init_rule(self.rule, list(eval_res.keys())[0])
|
|
return eval_res[self.key_indicator]
|
|
|
|
return None
|
|
|
|
|
|
class DistEvalHook(EvalHook):
|
|
"""Distributed evaluation hook.
|
|
|
|
This hook will regularly perform evaluation in a given interval when
|
|
performing in distributed environment.
|
|
|
|
Args:
|
|
dataloader (DataLoader): A PyTorch dataloader, whose dataset has
|
|
implemented ``evaluate`` function.
|
|
start (int | None, optional): Evaluation starting epoch. It enables
|
|
evaluation before the training starts if ``start`` <= the resuming
|
|
epoch. If None, whether to evaluate is merely decided by
|
|
``interval``. Default: None.
|
|
interval (int): Evaluation interval. Default: 1.
|
|
by_epoch (bool): Determine perform evaluation by epoch or by iteration.
|
|
If set to True, it will perform by epoch. Otherwise, by iteration.
|
|
default: True.
|
|
save_best (str, optional): If a metric is specified, it would measure
|
|
the best checkpoint during evaluation. The information about best
|
|
checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep
|
|
best score value and best checkpoint path, which will be also
|
|
loaded when resume checkpoint. Options are the evaluation metrics
|
|
on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox
|
|
detection and instance segmentation. ``AR@100`` for proposal
|
|
recall. If ``save_best`` is ``auto``, the first key of the returned
|
|
``OrderedDict`` result will be used. Default: None.
|
|
rule (str | None, optional): Comparison rule for best score. If set to
|
|
None, it will infer a reasonable rule. Keys such as 'acc', 'top'
|
|
.etc will be inferred by 'greater' rule. Keys contain 'loss' will
|
|
be inferred by 'less' rule. Options are 'greater', 'less', None.
|
|
Default: None.
|
|
test_fn (callable, optional): test a model with samples from a
|
|
dataloader in a multi-gpu manner, and return the test results. If
|
|
``None``, the default test function ``mmcv.engine.multi_gpu_test``
|
|
will be used. (default: ``None``)
|
|
tmpdir (str | None): Temporary directory to save the results of all
|
|
processes. Default: None.
|
|
gpu_collect (bool): Whether to use gpu or cpu to collect results.
|
|
Default: False.
|
|
broadcast_bn_buffer (bool): Whether to broadcast the
|
|
buffer(running_mean and running_var) of rank 0 to other rank
|
|
before evaluation. Default: True.
|
|
out_dir (str, optional): The root directory to save checkpoints. If not
|
|
specified, `runner.work_dir` will be used by default. If specified,
|
|
the `out_dir` will be the concatenation of `out_dir` and the last
|
|
level directory of `runner.work_dir`.
|
|
file_client_args (dict): Arguments to instantiate a FileClient.
|
|
See :class:`mmcv.fileio.FileClient` for details. Default: None.
|
|
**eval_kwargs: Evaluation arguments fed into the evaluate function of
|
|
the dataset.
|
|
"""
|
|
|
|
def __init__(self,
|
|
dataloader,
|
|
start=None,
|
|
interval=1,
|
|
by_epoch=True,
|
|
save_best=None,
|
|
rule=None,
|
|
test_fn=None,
|
|
greater_keys=None,
|
|
less_keys=None,
|
|
broadcast_bn_buffer=True,
|
|
tmpdir=None,
|
|
gpu_collect=False,
|
|
out_dir=None,
|
|
file_client_args=None,
|
|
**eval_kwargs):
|
|
|
|
if test_fn is None:
|
|
from annotator.uniformer.mmcv.engine import multi_gpu_test
|
|
test_fn = multi_gpu_test
|
|
|
|
super().__init__(
|
|
dataloader,
|
|
start=start,
|
|
interval=interval,
|
|
by_epoch=by_epoch,
|
|
save_best=save_best,
|
|
rule=rule,
|
|
test_fn=test_fn,
|
|
greater_keys=greater_keys,
|
|
less_keys=less_keys,
|
|
out_dir=out_dir,
|
|
file_client_args=file_client_args,
|
|
**eval_kwargs)
|
|
|
|
self.broadcast_bn_buffer = broadcast_bn_buffer
|
|
self.tmpdir = tmpdir
|
|
self.gpu_collect = gpu_collect
|
|
|
|
def _do_evaluate(self, runner):
|
|
"""perform evaluation and save ckpt."""
|
|
|
|
|
|
|
|
|
|
|
|
if self.broadcast_bn_buffer:
|
|
model = runner.model
|
|
for name, module in model.named_modules():
|
|
if isinstance(module,
|
|
_BatchNorm) and module.track_running_stats:
|
|
dist.broadcast(module.running_var, 0)
|
|
dist.broadcast(module.running_mean, 0)
|
|
|
|
tmpdir = self.tmpdir
|
|
if tmpdir is None:
|
|
tmpdir = osp.join(runner.work_dir, '.eval_hook')
|
|
|
|
results = self.test_fn(
|
|
runner.model,
|
|
self.dataloader,
|
|
tmpdir=tmpdir,
|
|
gpu_collect=self.gpu_collect)
|
|
if runner.rank == 0:
|
|
print('\n')
|
|
runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
|
|
key_score = self.evaluate(runner, results)
|
|
|
|
|
|
if self.save_best and key_score:
|
|
self._save_ckpt(runner, key_score)
|
|
|