diff --git a/src2/__init__.py b/src2/__init__.py deleted file mode 100644 index 6cb1033d22edfa3c5dcb167c5ee21fd4ed99523f..0000000000000000000000000000000000000000 --- a/src2/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ - -from . import data -from . import nn -from . import optim -from . import zoo diff --git a/src2/__pycache__/__init__.cpython-310.pyc b/src2/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 53a43d914de1be5f40ef8ae2d6ecf4c641c4bf0f..0000000000000000000000000000000000000000 Binary files a/src2/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/src2/core/__init__.py b/src2/core/__init__.py deleted file mode 100644 index 35c455c63d4fbb2bbf85a83bc3cadec9913335a8..0000000000000000000000000000000000000000 --- a/src2/core/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -"""by lyuwenyu -""" - -# from .yaml_utils import register, create, load_config, merge_config, merge_dict -from .yaml_utils import * -from .config import BaseConfig -from .yaml_config import YAMLConfig diff --git a/src2/core/__pycache__/__init__.cpython-310.pyc b/src2/core/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 421eea2819987662712b8e4c2be66c9011c5bf76..0000000000000000000000000000000000000000 Binary files a/src2/core/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/src2/core/__pycache__/config.cpython-310.pyc b/src2/core/__pycache__/config.cpython-310.pyc deleted file mode 100644 index 682320aaef33dca015a95e529f1c95f9ab84468a..0000000000000000000000000000000000000000 Binary files a/src2/core/__pycache__/config.cpython-310.pyc and /dev/null differ diff --git a/src2/core/__pycache__/yaml_config.cpython-310.pyc b/src2/core/__pycache__/yaml_config.cpython-310.pyc deleted file mode 100644 index 20b460caa67bd38069fed297121aa346aed910f0..0000000000000000000000000000000000000000 Binary files a/src2/core/__pycache__/yaml_config.cpython-310.pyc and /dev/null differ diff --git a/src2/core/__pycache__/yaml_utils.cpython-310.pyc b/src2/core/__pycache__/yaml_utils.cpython-310.pyc deleted file mode 100644 index 6be3540a559cba8a35ca7ca575642e4a56b3f8cf..0000000000000000000000000000000000000000 Binary files a/src2/core/__pycache__/yaml_utils.cpython-310.pyc and /dev/null differ diff --git a/src2/core/config.py b/src2/core/config.py deleted file mode 100644 index cf803ef56702c00091fa6aa009e0c1367b992e44..0000000000000000000000000000000000000000 --- a/src2/core/config.py +++ /dev/null @@ -1,264 +0,0 @@ -"""by lyuwenyu -""" - -from pprint import pprint -import torch -import torch.nn as nn -from torch.utils.data import Dataset, DataLoader -from torch.optim import Optimizer -from torch.optim.lr_scheduler import LRScheduler -from torch.cuda.amp.grad_scaler import GradScaler - -from typing import Callable, List, Dict - - -__all__ = ['BaseConfig', ] - - - -class BaseConfig(object): - # TODO property - - - def __init__(self) -> None: - super().__init__() - - self.task :str = None - - self._model :nn.Module = None - self._postprocessor :nn.Module = None - self._criterion :nn.Module = None - self._optimizer :Optimizer = None - self._lr_scheduler :LRScheduler = None - self._train_dataloader :DataLoader = None - self._val_dataloader :DataLoader = None - self._ema :nn.Module = None - self._scaler :GradScaler = None - - self.train_dataset :Dataset = None - self.val_dataset :Dataset = None - self.num_workers :int = 0 - self.collate_fn :Callable = None - - self.batch_size :int = None - self._train_batch_size :int = None - self._val_batch_size :int = None - self._train_shuffle: bool = None - self._val_shuffle: bool = None - - self.evaluator :Callable[[nn.Module, DataLoader, str], ] = None - - # runtime - self.resume :str = None - self.tuning :str = None - - self.epoches :int = None - self.last_epoch :int = -1 - self.end_epoch :int = None - - self.use_amp :bool = False - self.use_ema :bool = False - self.sync_bn :bool = False - self.clip_max_norm : float = None - self.find_unused_parameters :bool = None - # self.ema_decay: float = 0.9999 - # self.grad_clip_: Callable = None - - self.log_dir :str = './logs/' - self.log_step :int = 10 - self._output_dir :str = None - self._print_freq :int = None - self.checkpoint_step :int = 1 - - # self.device :str = torch.device('cpu') - device = 'cuda' if torch.cuda.is_available() else 'cpu' - self.device = torch.device(device) - - - @property - def model(self, ) -> nn.Module: - return self._model - - @model.setter - def model(self, m): - assert isinstance(m, nn.Module), f'{type(m)} != nn.Module, please check your model class' - self._model = m - - @property - def postprocessor(self, ) -> nn.Module: - return self._postprocessor - - @postprocessor.setter - def postprocessor(self, m): - assert isinstance(m, nn.Module), f'{type(m)} != nn.Module, please check your model class' - self._postprocessor = m - - @property - def criterion(self, ) -> nn.Module: - return self._criterion - - @criterion.setter - def criterion(self, m): - assert isinstance(m, nn.Module), f'{type(m)} != nn.Module, please check your model class' - self._criterion = m - - @property - def optimizer(self, ) -> Optimizer: - return self._optimizer - - @optimizer.setter - def optimizer(self, m): - assert isinstance(m, Optimizer), f'{type(m)} != optim.Optimizer, please check your model class' - self._optimizer = m - - @property - def lr_scheduler(self, ) -> LRScheduler: - return self._lr_scheduler - - @lr_scheduler.setter - def lr_scheduler(self, m): - assert isinstance(m, LRScheduler), f'{type(m)} != LRScheduler, please check your model class' - self._lr_scheduler = m - - - @property - def train_dataloader(self): - if self._train_dataloader is None and self.train_dataset is not None: - loader = DataLoader(self.train_dataset, - batch_size=self.train_batch_size, - num_workers=self.num_workers, - collate_fn=self.collate_fn, - shuffle=self.train_shuffle, ) - loader.shuffle = self.train_shuffle - self._train_dataloader = loader - - return self._train_dataloader - - @train_dataloader.setter - def train_dataloader(self, loader): - self._train_dataloader = loader - - @property - def val_dataloader(self): - if self._val_dataloader is None and self.val_dataset is not None: - loader = DataLoader(self.val_dataset, - batch_size=self.val_batch_size, - num_workers=self.num_workers, - drop_last=False, - collate_fn=self.collate_fn, - shuffle=self.val_shuffle) - loader.shuffle = self.val_shuffle - self._val_dataloader = loader - - return self._val_dataloader - - @val_dataloader.setter - def val_dataloader(self, loader): - self._val_dataloader = loader - - - # TODO method - # @property - # def ema(self, ) -> nn.Module: - # if self._ema is None and self.use_ema and self.model is not None: - # self._ema = ModelEMA(self.model, self.ema_decay) - # return self._ema - - @property - def ema(self, ) -> nn.Module: - return self._ema - - @ema.setter - def ema(self, obj): - self._ema = obj - - - @property - def scaler(self) -> GradScaler: - if self._scaler is None and self.use_amp and torch.cuda.is_available(): - self._scaler = GradScaler() - return self._scaler - - @scaler.setter - def scaler(self, obj: GradScaler): - self._scaler = obj - - - @property - def val_shuffle(self): - if self._val_shuffle is None: - print('warning: set default val_shuffle=False') - return False - return self._val_shuffle - - @val_shuffle.setter - def val_shuffle(self, shuffle): - assert isinstance(shuffle, bool), 'shuffle must be bool' - self._val_shuffle = shuffle - - @property - def train_shuffle(self): - if self._train_shuffle is None: - print('warning: set default train_shuffle=True') - return True - return self._train_shuffle - - @train_shuffle.setter - def train_shuffle(self, shuffle): - assert isinstance(shuffle, bool), 'shuffle must be bool' - self._train_shuffle = shuffle - - - @property - def train_batch_size(self): - if self._train_batch_size is None and isinstance(self.batch_size, int): - print(f'warning: set train_batch_size=batch_size={self.batch_size}') - return self.batch_size - return self._train_batch_size - - @train_batch_size.setter - def train_batch_size(self, batch_size): - assert isinstance(batch_size, int), 'batch_size must be int' - self._train_batch_size = batch_size - - @property - def val_batch_size(self): - if self._val_batch_size is None: - print(f'warning: set val_batch_size=batch_size={self.batch_size}') - return self.batch_size - return self._val_batch_size - - @val_batch_size.setter - def val_batch_size(self, batch_size): - assert isinstance(batch_size, int), 'batch_size must be int' - self._val_batch_size = batch_size - - - @property - def output_dir(self): - if self._output_dir is None: - return self.log_dir - return self._output_dir - - @output_dir.setter - def output_dir(self, root): - self._output_dir = root - - @property - def print_freq(self): - if self._print_freq is None: - # self._print_freq = self.log_step - return self.log_step - return self._print_freq - - @print_freq.setter - def print_freq(self, n): - assert isinstance(n, int), 'print_freq must be int' - self._print_freq = n - - - # def __repr__(self) -> str: - # pass - - - diff --git a/src2/core/yaml_config.py b/src2/core/yaml_config.py deleted file mode 100644 index 6f8f7ef108e48b730bbff18ddcc299a925a8a5bf..0000000000000000000000000000000000000000 --- a/src2/core/yaml_config.py +++ /dev/null @@ -1,152 +0,0 @@ -"""by lyuwenyu -""" - -import torch -import torch.nn as nn - -import re -import copy - -from .config import BaseConfig -from .yaml_utils import load_config, merge_config, create, merge_dict - - -class YAMLConfig(BaseConfig): - def __init__(self, cfg_path: str, **kwargs) -> None: - super().__init__() - - cfg = load_config(cfg_path) - merge_dict(cfg, kwargs) - - # pprint(cfg) - - self.yaml_cfg = cfg - - self.log_step = cfg.get('log_step', 100) - self.checkpoint_step = cfg.get('checkpoint_step', 1) - self.epoches = cfg.get('epoches', -1) - self.resume = cfg.get('resume', '') - self.tuning = cfg.get('tuning', '') - self.sync_bn = cfg.get('sync_bn', False) - self.output_dir = cfg.get('output_dir', None) - - self.use_ema = cfg.get('use_ema', False) - self.use_amp = cfg.get('use_amp', False) - self.autocast = cfg.get('autocast', dict()) - self.find_unused_parameters = cfg.get('find_unused_parameters', None) - self.clip_max_norm = cfg.get('clip_max_norm', 0.) - - - @property - def model(self, ) -> torch.nn.Module: - if self._model is None and 'model' in self.yaml_cfg: - merge_config(self.yaml_cfg) - self._model = create(self.yaml_cfg['model']) - return self._model - - @property - def postprocessor(self, ) -> torch.nn.Module: - if self._postprocessor is None and 'postprocessor' in self.yaml_cfg: - merge_config(self.yaml_cfg) - self._postprocessor = create(self.yaml_cfg['postprocessor']) - return self._postprocessor - - @property - def criterion(self, ): - if self._criterion is None and 'criterion' in self.yaml_cfg: - merge_config(self.yaml_cfg) - self._criterion = create(self.yaml_cfg['criterion']) - return self._criterion - - - @property - def optimizer(self, ): - if self._optimizer is None and 'optimizer' in self.yaml_cfg: - merge_config(self.yaml_cfg) - params = self.get_optim_params(self.yaml_cfg['optimizer'], self.model) - self._optimizer = create('optimizer', params=params) - - return self._optimizer - - @property - def lr_scheduler(self, ): - if self._lr_scheduler is None and 'lr_scheduler' in self.yaml_cfg: - merge_config(self.yaml_cfg) - self._lr_scheduler = create('lr_scheduler', optimizer=self.optimizer) - print('Initial lr: ', self._lr_scheduler.get_last_lr()) - - return self._lr_scheduler - - @property - def train_dataloader(self, ): - if self._train_dataloader is None and 'train_dataloader' in self.yaml_cfg: - merge_config(self.yaml_cfg) - self._train_dataloader = create('train_dataloader') - self._train_dataloader.shuffle = self.yaml_cfg['train_dataloader'].get('shuffle', False) - - return self._train_dataloader - - @property - def val_dataloader(self, ): - if self._val_dataloader is None and 'val_dataloader' in self.yaml_cfg: - merge_config(self.yaml_cfg) - self._val_dataloader = create('val_dataloader') - self._val_dataloader.shuffle = self.yaml_cfg['val_dataloader'].get('shuffle', False) - - return self._val_dataloader - - - @property - def ema(self, ): - if self._ema is None and self.yaml_cfg.get('use_ema', False): - merge_config(self.yaml_cfg) - self._ema = create('ema', model=self.model) - - return self._ema - - - @property - def scaler(self, ): - if self._scaler is None and self.yaml_cfg.get('use_amp', False): - merge_config(self.yaml_cfg) - self._scaler = create('scaler') - - return self._scaler - - - @staticmethod - def get_optim_params(cfg: dict, model: nn.Module): - ''' - E.g.: - ^(?=.*a)(?=.*b).*$ means including a and b - ^((?!b.)*a((?!b).)*$ means including a but not b - ^((?!b|c).)*a((?!b|c).)*$ means including a but not (b | c) - ''' - assert 'type' in cfg, '' - cfg = copy.deepcopy(cfg) - - if 'params' not in cfg: - return model.parameters() - - assert isinstance(cfg['params'], list), '' - - param_groups = [] - visited = [] - for pg in cfg['params']: - pattern = pg['params'] - params = {k: v for k, v in model.named_parameters() if v.requires_grad and len(re.findall(pattern, k)) > 0} - pg['params'] = params.values() - param_groups.append(pg) - visited.extend(list(params.keys())) - - names = [k for k, v in model.named_parameters() if v.requires_grad] - - if len(visited) < len(names): - unseen = set(names) - set(visited) - params = {k: v for k, v in model.named_parameters() if v.requires_grad and k in unseen} - param_groups.append({'params': params.values()}) - visited.extend(list(params.keys())) - - assert len(visited) == len(names), '' - - return param_groups diff --git a/src2/core/yaml_utils.py b/src2/core/yaml_utils.py deleted file mode 100644 index c9ed25902cfbec49567dbf36dd99554787cd8b14..0000000000000000000000000000000000000000 --- a/src2/core/yaml_utils.py +++ /dev/null @@ -1,208 +0,0 @@ -""""by lyuwenyu -""" - -import os -import yaml -import inspect -import importlib - -__all__ = ['GLOBAL_CONFIG', 'register', 'create', 'load_config', 'merge_config', 'merge_dict'] - - -GLOBAL_CONFIG = dict() -INCLUDE_KEY = '__include__' - - -def register(cls: type): - ''' - Args: - cls (type): Module class to be registered. - ''' - if cls.__name__ in GLOBAL_CONFIG: - raise ValueError('{} already registered'.format(cls.__name__)) - - if inspect.isfunction(cls): - GLOBAL_CONFIG[cls.__name__] = cls - - elif inspect.isclass(cls): - GLOBAL_CONFIG[cls.__name__] = extract_schema(cls) - - else: - raise ValueError(f'register {cls}') - - return cls - - -def extract_schema(cls: type): - ''' - Args: - cls (type), - Return: - Dict, - ''' - argspec = inspect.getfullargspec(cls.__init__) - arg_names = [arg for arg in argspec.args if arg != 'self'] - num_defualts = len(argspec.defaults) if argspec.defaults is not None else 0 - num_requires = len(arg_names) - num_defualts - - schame = dict() - schame['_name'] = cls.__name__ - schame['_pymodule'] = importlib.import_module(cls.__module__) - schame['_inject'] = getattr(cls, '__inject__', []) - schame['_share'] = getattr(cls, '__share__', []) - - for i, name in enumerate(arg_names): - if name in schame['_share']: - assert i >= num_requires, 'share config must have default value.' - value = argspec.defaults[i - num_requires] - - elif i >= num_requires: - value = argspec.defaults[i - num_requires] - - else: - value = None - - schame[name] = value - - return schame - - - -def create(type_or_name, **kwargs): - ''' - ''' - assert type(type_or_name) in (type, str), 'create should be class or name.' - - name = type_or_name if isinstance(type_or_name, str) else type_or_name.__name__ - - if name in GLOBAL_CONFIG: - if hasattr(GLOBAL_CONFIG[name], '__dict__'): - return GLOBAL_CONFIG[name] - else: - raise ValueError('The module {} is not registered'.format(name)) - - cfg = GLOBAL_CONFIG[name] - - if isinstance(cfg, dict) and 'type' in cfg: - _cfg: dict = GLOBAL_CONFIG[cfg['type']] - _cfg.update(cfg) # update global cls default args - _cfg.update(kwargs) # TODO - name = _cfg.pop('type') - - return create(name) - - - cls = getattr(cfg['_pymodule'], name) - argspec = inspect.getfullargspec(cls.__init__) - arg_names = [arg for arg in argspec.args if arg != 'self'] - - cls_kwargs = {} - cls_kwargs.update(cfg) - - # shared var - for k in cfg['_share']: - if k in GLOBAL_CONFIG: - cls_kwargs[k] = GLOBAL_CONFIG[k] - else: - cls_kwargs[k] = cfg[k] - - # inject - for k in cfg['_inject']: - _k = cfg[k] - - if _k is None: - continue - - if isinstance(_k, str): - if _k not in GLOBAL_CONFIG: - raise ValueError(f'Missing inject config of {_k}.') - - _cfg = GLOBAL_CONFIG[_k] - - if isinstance(_cfg, dict): - cls_kwargs[k] = create(_cfg['_name']) - else: - cls_kwargs[k] = _cfg - - elif isinstance(_k, dict): - if 'type' not in _k.keys(): - raise ValueError(f'Missing inject for `type` style.') - - _type = str(_k['type']) - if _type not in GLOBAL_CONFIG: - raise ValueError(f'Missing {_type} in inspect stage.') - - # TODO modified inspace, maybe get wrong result for using `> 1` - _cfg: dict = GLOBAL_CONFIG[_type] - # _cfg_copy = copy.deepcopy(_cfg) - _cfg.update(_k) # update - cls_kwargs[k] = create(_type) - # _cfg.update(_cfg_copy) # resume - - else: - raise ValueError(f'Inject does not support {_k}') - - - cls_kwargs = {n: cls_kwargs[n] for n in arg_names} - - return cls(**cls_kwargs) - - - -def load_config(file_path, cfg=dict()): - '''load config - ''' - _, ext = os.path.splitext(file_path) - assert ext in ['.yml', '.yaml'], "only support yaml files for now" - - with open(file_path) as f: - file_cfg = yaml.load(f, Loader=yaml.Loader) - if file_cfg is None: - return {} - - if INCLUDE_KEY in file_cfg: - base_yamls = list(file_cfg[INCLUDE_KEY]) - for base_yaml in base_yamls: - if base_yaml.startswith('~'): - base_yaml = os.path.expanduser(base_yaml) - - if not base_yaml.startswith('/'): - base_yaml = os.path.join(os.path.dirname(file_path), base_yaml) - - with open(base_yaml) as f: - base_cfg = load_config(base_yaml, cfg) - merge_config(base_cfg, cfg) - - return merge_config(file_cfg, cfg) - - - -def merge_dict(dct, another_dct): - '''merge another_dct into dct - ''' - for k in another_dct: - if (k in dct and isinstance(dct[k], dict) and isinstance(another_dct[k], dict)): - merge_dict(dct[k], another_dct[k]) - else: - dct[k] = another_dct[k] - - return dct - - - -def merge_config(config, another_cfg=None): - """ - Merge config into global config or another_cfg. - - Args: - config (dict): Config to be merged. - - Returns: global config - """ - global GLOBAL_CONFIG - dct = GLOBAL_CONFIG if another_cfg is None else another_cfg - - return merge_dict(dct, config) - - - diff --git a/src2/data/__init__.py b/src2/data/__init__.py deleted file mode 100644 index 95715f8a76937758b2c5ec9d121fc069fddcbabb..0000000000000000000000000000000000000000 --- a/src2/data/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ - -from .coco import * -from .cifar10 import CIFAR10 - -from .dataloader import * -from .transforms import * - diff --git a/src2/data/__pycache__/__init__.cpython-310.pyc b/src2/data/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 728b72a8f227c671ddcb702953e6919a8df26edb..0000000000000000000000000000000000000000 Binary files a/src2/data/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/src2/data/__pycache__/dataloader.cpython-310.pyc b/src2/data/__pycache__/dataloader.cpython-310.pyc deleted file mode 100644 index fc43a3505bcb61a2ba33772cf2659e8c7fd108f6..0000000000000000000000000000000000000000 Binary files a/src2/data/__pycache__/dataloader.cpython-310.pyc and /dev/null differ diff --git a/src2/data/__pycache__/transforms.cpython-310.pyc b/src2/data/__pycache__/transforms.cpython-310.pyc deleted file mode 100644 index 3f0ded5607da300be686c025c918cd5f74e60ab9..0000000000000000000000000000000000000000 Binary files a/src2/data/__pycache__/transforms.cpython-310.pyc and /dev/null differ diff --git a/src2/data/cifar10/__init__.py b/src2/data/cifar10/__init__.py deleted file mode 100644 index e5267dccd21c6c4371c14ff0a04b064f608dfc14..0000000000000000000000000000000000000000 --- a/src2/data/cifar10/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ - -import torchvision -from typing import Optional, Callable - -from src.core import register - - -@register -class CIFAR10(torchvision.datasets.CIFAR10): - __inject__ = ['transform', 'target_transform'] - - def __init__(self, root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False) -> None: - super().__init__(root, train, transform, target_transform, download) - diff --git a/src2/data/cifar10/__pycache__/__init__.cpython-310.pyc b/src2/data/cifar10/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index bc91d6c52e59340ccace02e97c6671d55112270c..0000000000000000000000000000000000000000 Binary files a/src2/data/cifar10/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/src2/data/coco/__init__.py b/src2/data/coco/__init__.py deleted file mode 100644 index c83b002187885f1571556b16e1c3632f03d68a0a..0000000000000000000000000000000000000000 --- a/src2/data/coco/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .coco_dataset import ( - CocoDetection, - mscoco_category2label, - mscoco_label2category, - mscoco_category2name, -) -from .coco_eval import * - -from .coco_utils import get_coco_api_from_dataset \ No newline at end of file diff --git a/src2/data/coco/__pycache__/__init__.cpython-310.pyc b/src2/data/coco/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 59afeb4587966d6aad4c2ae35c2c3685de291606..0000000000000000000000000000000000000000 Binary files a/src2/data/coco/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/src2/data/coco/__pycache__/coco_dataset.cpython-310.pyc b/src2/data/coco/__pycache__/coco_dataset.cpython-310.pyc deleted file mode 100644 index d629862dfd2b09122141c3e08b8880e12de3cc0c..0000000000000000000000000000000000000000 Binary files a/src2/data/coco/__pycache__/coco_dataset.cpython-310.pyc and /dev/null differ diff --git a/src2/data/coco/__pycache__/coco_eval.cpython-310.pyc b/src2/data/coco/__pycache__/coco_eval.cpython-310.pyc deleted file mode 100644 index 4440587b2f0a6dee0dbe69ca178e87b8376520ce..0000000000000000000000000000000000000000 Binary files a/src2/data/coco/__pycache__/coco_eval.cpython-310.pyc and /dev/null differ diff --git a/src2/data/coco/__pycache__/coco_utils.cpython-310.pyc b/src2/data/coco/__pycache__/coco_utils.cpython-310.pyc deleted file mode 100644 index b56f5294a0d8a67969e06c1c27417e30ba043624..0000000000000000000000000000000000000000 Binary files a/src2/data/coco/__pycache__/coco_utils.cpython-310.pyc and /dev/null differ diff --git a/src2/data/coco/coco_dataset.py b/src2/data/coco/coco_dataset.py deleted file mode 100644 index 0ef78498d753a76538651148f0692b1515173149..0000000000000000000000000000000000000000 --- a/src2/data/coco/coco_dataset.py +++ /dev/null @@ -1,238 +0,0 @@ -""" -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -COCO dataset which returns image_id for evaluation. -Mostly copy-paste from https://github.com/pytorch/vision/blob/13b35ff/references/detection/coco_utils.py -""" - -import torch -import torch.utils.data - -import torchvision -torchvision.disable_beta_transforms_warning() - -from torchvision import datapoints - -from pycocotools import mask as coco_mask - -from src.core import register - -__all__ = ['CocoDetection'] - - -@register -class CocoDetection(torchvision.datasets.CocoDetection): - __inject__ = ['transforms'] - __share__ = ['remap_mscoco_category'] - - def __init__(self, img_folder, ann_file, transforms, return_masks, remap_mscoco_category=False): - super(CocoDetection, self).__init__(img_folder, ann_file) - self._transforms = transforms - self.prepare = ConvertCocoPolysToMask(return_masks, remap_mscoco_category) - self.img_folder = img_folder - self.ann_file = ann_file - self.return_masks = return_masks - self.remap_mscoco_category = remap_mscoco_category - - def __getitem__(self, idx): - img, target = super(CocoDetection, self).__getitem__(idx) - image_id = self.ids[idx] - target = {'image_id': image_id, 'annotations': target} - img, target = self.prepare(img, target) - - # ['boxes', 'masks', 'labels']: - if 'boxes' in target: - target['boxes'] = datapoints.BoundingBox( - target['boxes'], - format=datapoints.BoundingBoxFormat.XYXY, - spatial_size=img.size[::-1]) # h w - - if 'masks' in target: - target['masks'] = datapoints.Mask(target['masks']) - - if self._transforms is not None: - img, target = self._transforms(img, target) - - return img, target - - def extra_repr(self) -> str: - s = f' img_folder: {self.img_folder}\n ann_file: {self.ann_file}\n' - s += f' return_masks: {self.return_masks}\n' - if hasattr(self, '_transforms') and self._transforms is not None: - s += f' transforms:\n {repr(self._transforms)}' - - return s - - -def convert_coco_poly_to_mask(segmentations, height, width): - masks = [] - for polygons in segmentations: - rles = coco_mask.frPyObjects(polygons, height, width) - mask = coco_mask.decode(rles) - if len(mask.shape) < 3: - mask = mask[..., None] - mask = torch.as_tensor(mask, dtype=torch.uint8) - mask = mask.any(dim=2) - masks.append(mask) - if masks: - masks = torch.stack(masks, dim=0) - else: - masks = torch.zeros((0, height, width), dtype=torch.uint8) - return masks - - -class ConvertCocoPolysToMask(object): - def __init__(self, return_masks=False, remap_mscoco_category=False): - self.return_masks = return_masks - self.remap_mscoco_category = remap_mscoco_category - - def __call__(self, image, target): - w, h = image.size - - image_id = target["image_id"] - image_id = torch.tensor([image_id]) - - anno = target["annotations"] - - anno = [obj for obj in anno if 'iscrowd' not in obj or obj['iscrowd'] == 0] - - boxes = [obj["bbox"] for obj in anno] - # guard against no boxes via resizing - boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) - boxes[:, 2:] += boxes[:, :2] - boxes[:, 0::2].clamp_(min=0, max=w) - boxes[:, 1::2].clamp_(min=0, max=h) - - if self.remap_mscoco_category: - classes = [mscoco_category2label[obj["category_id"]] for obj in anno] - else: - classes = [obj["category_id"] for obj in anno] - - classes = torch.tensor(classes, dtype=torch.int64) - - if self.return_masks: - segmentations = [obj["segmentation"] for obj in anno] - masks = convert_coco_poly_to_mask(segmentations, h, w) - - keypoints = None - if anno and "keypoints" in anno[0]: - keypoints = [obj["keypoints"] for obj in anno] - keypoints = torch.as_tensor(keypoints, dtype=torch.float32) - num_keypoints = keypoints.shape[0] - if num_keypoints: - keypoints = keypoints.view(num_keypoints, -1, 3) - - keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) - boxes = boxes[keep] - classes = classes[keep] - if self.return_masks: - masks = masks[keep] - if keypoints is not None: - keypoints = keypoints[keep] - - target = {} - target["boxes"] = boxes - target["labels"] = classes - if self.return_masks: - target["masks"] = masks - target["image_id"] = image_id - if keypoints is not None: - target["keypoints"] = keypoints - - # for conversion to coco api - area = torch.tensor([obj["area"] for obj in anno]) - iscrowd = torch.tensor([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno]) - target["area"] = area[keep] - target["iscrowd"] = iscrowd[keep] - - target["orig_size"] = torch.as_tensor([int(w), int(h)]) - target["size"] = torch.as_tensor([int(w), int(h)]) - - return image, target - - -mscoco_category2name = { - 1: 'person', - 2: 'bicycle', - 3: 'car', - 4: 'motorcycle', - 5: 'airplane', - 6: 'bus', - 7: 'train', - 8: 'truck', - 9: 'boat', - 10: 'traffic light', - 11: 'fire hydrant', - 13: 'stop sign', - 14: 'parking meter', - 15: 'bench', - 16: 'bird', - 17: 'cat', - 18: 'dog', - 19: 'horse', - 20: 'sheep', - 21: 'cow', - 22: 'elephant', - 23: 'bear', - 24: 'zebra', - 25: 'giraffe', - 27: 'backpack', - 28: 'umbrella', - 31: 'handbag', - 32: 'tie', - 33: 'suitcase', - 34: 'frisbee', - 35: 'skis', - 36: 'snowboard', - 37: 'sports ball', - 38: 'kite', - 39: 'baseball bat', - 40: 'baseball glove', - 41: 'skateboard', - 42: 'surfboard', - 43: 'tennis racket', - 44: 'bottle', - 46: 'wine glass', - 47: 'cup', - 48: 'fork', - 49: 'knife', - 50: 'spoon', - 51: 'bowl', - 52: 'banana', - 53: 'apple', - 54: 'sandwich', - 55: 'orange', - 56: 'broccoli', - 57: 'carrot', - 58: 'hot dog', - 59: 'pizza', - 60: 'donut', - 61: 'cake', - 62: 'chair', - 63: 'couch', - 64: 'potted plant', - 65: 'bed', - 67: 'dining table', - 70: 'toilet', - 72: 'tv', - 73: 'laptop', - 74: 'mouse', - 75: 'remote', - 76: 'keyboard', - 77: 'cell phone', - 78: 'microwave', - 79: 'oven', - 80: 'toaster', - 81: 'sink', - 82: 'refrigerator', - 84: 'book', - 85: 'clock', - 86: 'vase', - 87: 'scissors', - 88: 'teddy bear', - 89: 'hair drier', - 90: 'toothbrush' -} - -mscoco_category2label = {k: i for i, k in enumerate(mscoco_category2name.keys())} -mscoco_label2category = {v: k for k, v in mscoco_category2label.items()} \ No newline at end of file diff --git a/src2/data/coco/coco_eval.py b/src2/data/coco/coco_eval.py deleted file mode 100644 index 2d629f5aab011357918ef7303a3dab39e6be4b49..0000000000000000000000000000000000000000 --- a/src2/data/coco/coco_eval.py +++ /dev/null @@ -1,269 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -""" -COCO evaluator that works in distributed mode. - -Mostly copy-paste from https://github.com/pytorch/vision/blob/edfd5a7/references/detection/coco_eval.py -The difference is that there is less copy-pasting from pycocotools -in the end of the file, as python3 can suppress prints with contextlib -""" -import os -import contextlib -import copy -import numpy as np -import torch - -from pycocotools.cocoeval import COCOeval -from pycocotools.coco import COCO -import pycocotools.mask as mask_util - -from src.misc import dist - - -__all__ = ['CocoEvaluator',] - - -class CocoEvaluator(object): - def __init__(self, coco_gt, iou_types): - assert isinstance(iou_types, (list, tuple)) - coco_gt = copy.deepcopy(coco_gt) - self.coco_gt = coco_gt - - self.iou_types = iou_types - self.coco_eval = {} - for iou_type in iou_types: - self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) - - self.img_ids = [] - self.eval_imgs = {k: [] for k in iou_types} - - def update(self, predictions): - img_ids = list(np.unique(list(predictions.keys()))) - self.img_ids.extend(img_ids) - - for iou_type in self.iou_types: - results = self.prepare(predictions, iou_type) - - # suppress pycocotools prints - with open(os.devnull, 'w') as devnull: - with contextlib.redirect_stdout(devnull): - coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO() - coco_eval = self.coco_eval[iou_type] - - coco_eval.cocoDt = coco_dt - coco_eval.params.imgIds = list(img_ids) - img_ids, eval_imgs = evaluate(coco_eval) - - self.eval_imgs[iou_type].append(eval_imgs) - - def synchronize_between_processes(self): - for iou_type in self.iou_types: - self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) - create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) - - def accumulate(self): - for coco_eval in self.coco_eval.values(): - coco_eval.accumulate() - - def summarize(self): - for iou_type, coco_eval in self.coco_eval.items(): - print("IoU metric: {}".format(iou_type)) - coco_eval.summarize() - - def prepare(self, predictions, iou_type): - if iou_type == "bbox": - return self.prepare_for_coco_detection(predictions) - elif iou_type == "segm": - return self.prepare_for_coco_segmentation(predictions) - elif iou_type == "keypoints": - return self.prepare_for_coco_keypoint(predictions) - else: - raise ValueError("Unknown iou type {}".format(iou_type)) - - def prepare_for_coco_detection(self, predictions): - coco_results = [] - for original_id, prediction in predictions.items(): - if len(prediction) == 0: - continue - - boxes = prediction["boxes"] - boxes = convert_to_xywh(boxes).tolist() - scores = prediction["scores"].tolist() - labels = prediction["labels"].tolist() - - coco_results.extend( - [ - { - "image_id": original_id, - "category_id": labels[k], - "bbox": box, - "score": scores[k], - } - for k, box in enumerate(boxes) - ] - ) - return coco_results - - def prepare_for_coco_segmentation(self, predictions): - coco_results = [] - for original_id, prediction in predictions.items(): - if len(prediction) == 0: - continue - - scores = prediction["scores"] - labels = prediction["labels"] - masks = prediction["masks"] - - masks = masks > 0.5 - - scores = prediction["scores"].tolist() - labels = prediction["labels"].tolist() - - rles = [ - mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] - for mask in masks - ] - for rle in rles: - rle["counts"] = rle["counts"].decode("utf-8") - - coco_results.extend( - [ - { - "image_id": original_id, - "category_id": labels[k], - "segmentation": rle, - "score": scores[k], - } - for k, rle in enumerate(rles) - ] - ) - return coco_results - - def prepare_for_coco_keypoint(self, predictions): - coco_results = [] - for original_id, prediction in predictions.items(): - if len(prediction) == 0: - continue - - boxes = prediction["boxes"] - boxes = convert_to_xywh(boxes).tolist() - scores = prediction["scores"].tolist() - labels = prediction["labels"].tolist() - keypoints = prediction["keypoints"] - keypoints = keypoints.flatten(start_dim=1).tolist() - - coco_results.extend( - [ - { - "image_id": original_id, - "category_id": labels[k], - 'keypoints': keypoint, - "score": scores[k], - } - for k, keypoint in enumerate(keypoints) - ] - ) - return coco_results - - -def convert_to_xywh(boxes): - xmin, ymin, xmax, ymax = boxes.unbind(1) - return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) - - -def merge(img_ids, eval_imgs): - all_img_ids = dist.all_gather(img_ids) - all_eval_imgs = dist.all_gather(eval_imgs) - - merged_img_ids = [] - for p in all_img_ids: - merged_img_ids.extend(p) - - merged_eval_imgs = [] - for p in all_eval_imgs: - merged_eval_imgs.append(p) - - merged_img_ids = np.array(merged_img_ids) - merged_eval_imgs = np.concatenate(merged_eval_imgs, 2) - - # keep only unique (and in sorted order) images - merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) - merged_eval_imgs = merged_eval_imgs[..., idx] - - return merged_img_ids, merged_eval_imgs - - -def create_common_coco_eval(coco_eval, img_ids, eval_imgs): - img_ids, eval_imgs = merge(img_ids, eval_imgs) - img_ids = list(img_ids) - eval_imgs = list(eval_imgs.flatten()) - - coco_eval.evalImgs = eval_imgs - coco_eval.params.imgIds = img_ids - coco_eval._paramsEval = copy.deepcopy(coco_eval.params) - - -################################################################# -# From pycocotools, just removed the prints and fixed -# a Python3 bug about unicode not defined -################################################################# - - -# import io -# from contextlib import redirect_stdout -# def evaluate(imgs): -# with redirect_stdout(io.StringIO()): -# imgs.evaluate() -# return imgs.params.imgIds, np.asarray(imgs.evalImgs).reshape(-1, len(imgs.params.areaRng), len(imgs.params.imgIds)) - - -def evaluate(self): - ''' - Run per image evaluation on given images and store results (a list of dict) in self.evalImgs - :return: None - ''' - # tic = time.time() - # print('Running per image evaluation...') - p = self.params - # add backward compatibility if useSegm is specified in params - if p.useSegm is not None: - p.iouType = 'segm' if p.useSegm == 1 else 'bbox' - print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) - # print('Evaluate annotation type *{}*'.format(p.iouType)) - p.imgIds = list(np.unique(p.imgIds)) - if p.useCats: - p.catIds = list(np.unique(p.catIds)) - p.maxDets = sorted(p.maxDets) - self.params = p - - self._prepare() - # loop through images, area range, max detection number - catIds = p.catIds if p.useCats else [-1] - - if p.iouType == 'segm' or p.iouType == 'bbox': - computeIoU = self.computeIoU - elif p.iouType == 'keypoints': - computeIoU = self.computeOks - self.ious = { - (imgId, catId): computeIoU(imgId, catId) - for imgId in p.imgIds - for catId in catIds} - - evaluateImg = self.evaluateImg - maxDet = p.maxDets[-1] - evalImgs = [ - evaluateImg(imgId, catId, areaRng, maxDet) - for catId in catIds - for areaRng in p.areaRng - for imgId in p.imgIds - ] - # this is NOT in the pycocotools code, but could be done outside - evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds)) - self._paramsEval = copy.deepcopy(self.params) - # toc = time.time() - # print('DONE (t={:0.2f}s).'.format(toc-tic)) - return p.imgIds, evalImgs - -################################################################# -# end of straight copy from pycocotools, just removing the prints -################################################################# - diff --git a/src2/data/coco/coco_utils.py b/src2/data/coco/coco_utils.py deleted file mode 100644 index 48c099474c63e08a30c124fbfc07082edf9feb49..0000000000000000000000000000000000000000 --- a/src2/data/coco/coco_utils.py +++ /dev/null @@ -1,184 +0,0 @@ -import os - -import torch -import torch.utils.data -import torchvision -from pycocotools import mask as coco_mask -from pycocotools.coco import COCO - - -def convert_coco_poly_to_mask(segmentations, height, width): - masks = [] - for polygons in segmentations: - rles = coco_mask.frPyObjects(polygons, height, width) - mask = coco_mask.decode(rles) - if len(mask.shape) < 3: - mask = mask[..., None] - mask = torch.as_tensor(mask, dtype=torch.uint8) - mask = mask.any(dim=2) - masks.append(mask) - if masks: - masks = torch.stack(masks, dim=0) - else: - masks = torch.zeros((0, height, width), dtype=torch.uint8) - return masks - - -class ConvertCocoPolysToMask: - def __call__(self, image, target): - w, h = image.size - - image_id = target["image_id"] - - anno = target["annotations"] - - anno = [obj for obj in anno if obj["iscrowd"] == 0] - - boxes = [obj["bbox"] for obj in anno] - # guard against no boxes via resizing - boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) - boxes[:, 2:] += boxes[:, :2] - boxes[:, 0::2].clamp_(min=0, max=w) - boxes[:, 1::2].clamp_(min=0, max=h) - - classes = [obj["category_id"] for obj in anno] - classes = torch.tensor(classes, dtype=torch.int64) - - segmentations = [obj["segmentation"] for obj in anno] - masks = convert_coco_poly_to_mask(segmentations, h, w) - - keypoints = None - if anno and "keypoints" in anno[0]: - keypoints = [obj["keypoints"] for obj in anno] - keypoints = torch.as_tensor(keypoints, dtype=torch.float32) - num_keypoints = keypoints.shape[0] - if num_keypoints: - keypoints = keypoints.view(num_keypoints, -1, 3) - - keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) - boxes = boxes[keep] - classes = classes[keep] - masks = masks[keep] - if keypoints is not None: - keypoints = keypoints[keep] - - target = {} - target["boxes"] = boxes - target["labels"] = classes - target["masks"] = masks - target["image_id"] = image_id - if keypoints is not None: - target["keypoints"] = keypoints - - # for conversion to coco api - area = torch.tensor([obj["area"] for obj in anno]) - iscrowd = torch.tensor([obj["iscrowd"] for obj in anno]) - target["area"] = area - target["iscrowd"] = iscrowd - - return image, target - - -def _coco_remove_images_without_annotations(dataset, cat_list=None): - def _has_only_empty_bbox(anno): - return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) - - def _count_visible_keypoints(anno): - return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) - - min_keypoints_per_image = 10 - - def _has_valid_annotation(anno): - # if it's empty, there is no annotation - if len(anno) == 0: - return False - # if all boxes have close to zero area, there is no annotation - if _has_only_empty_bbox(anno): - return False - # keypoints task have a slight different criteria for considering - # if an annotation is valid - if "keypoints" not in anno[0]: - return True - # for keypoint detection tasks, only consider valid images those - # containing at least min_keypoints_per_image - if _count_visible_keypoints(anno) >= min_keypoints_per_image: - return True - return False - - ids = [] - for ds_idx, img_id in enumerate(dataset.ids): - ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) - anno = dataset.coco.loadAnns(ann_ids) - if cat_list: - anno = [obj for obj in anno if obj["category_id"] in cat_list] - if _has_valid_annotation(anno): - ids.append(ds_idx) - - dataset = torch.utils.data.Subset(dataset, ids) - return dataset - - -def convert_to_coco_api(ds): - coco_ds = COCO() - # annotation IDs need to start at 1, not 0, see torchvision issue #1530 - ann_id = 1 - dataset = {"images": [], "categories": [], "annotations": []} - categories = set() - for img_idx in range(len(ds)): - # find better way to get target - # targets = ds.get_annotations(img_idx) - img, targets = ds[img_idx] - image_id = targets["image_id"].item() - img_dict = {} - img_dict["id"] = image_id - img_dict["height"] = img.shape[-2] - img_dict["width"] = img.shape[-1] - dataset["images"].append(img_dict) - bboxes = targets["boxes"].clone() - bboxes[:, 2:] -= bboxes[:, :2] - bboxes = bboxes.tolist() - labels = targets["labels"].tolist() - areas = targets["area"].tolist() - iscrowd = targets["iscrowd"].tolist() - if "masks" in targets: - masks = targets["masks"] - # make masks Fortran contiguous for coco_mask - masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1) - if "keypoints" in targets: - keypoints = targets["keypoints"] - keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist() - num_objs = len(bboxes) - for i in range(num_objs): - ann = {} - ann["image_id"] = image_id - ann["bbox"] = bboxes[i] - ann["category_id"] = labels[i] - categories.add(labels[i]) - ann["area"] = areas[i] - ann["iscrowd"] = iscrowd[i] - ann["id"] = ann_id - if "masks" in targets: - ann["segmentation"] = coco_mask.encode(masks[i].numpy()) - if "keypoints" in targets: - ann["keypoints"] = keypoints[i] - ann["num_keypoints"] = sum(k != 0 for k in keypoints[i][2::3]) - dataset["annotations"].append(ann) - ann_id += 1 - dataset["categories"] = [{"id": i} for i in sorted(categories)] - coco_ds.dataset = dataset - coco_ds.createIndex() - return coco_ds - - -def get_coco_api_from_dataset(dataset): - # FIXME: This is... awful? - for _ in range(10): - if isinstance(dataset, torchvision.datasets.CocoDetection): - break - if isinstance(dataset, torch.utils.data.Subset): - dataset = dataset.dataset - if isinstance(dataset, torchvision.datasets.CocoDetection): - return dataset.coco - return convert_to_coco_api(dataset) - - diff --git a/src2/data/dataloader.py b/src2/data/dataloader.py deleted file mode 100644 index 4db7cadf307780d946bd082bf35aba455cac6816..0000000000000000000000000000000000000000 --- a/src2/data/dataloader.py +++ /dev/null @@ -1,28 +0,0 @@ -import torch -import torch.utils.data as data - -from src.core import register - - -__all__ = ['DataLoader'] - - -@register -class DataLoader(data.DataLoader): - __inject__ = ['dataset', 'collate_fn'] - - def __repr__(self) -> str: - format_string = self.__class__.__name__ + "(" - for n in ['dataset', 'batch_size', 'num_workers', 'drop_last', 'collate_fn']: - format_string += "\n" - format_string += " {0}: {1}".format(n, getattr(self, n)) - format_string += "\n)" - return format_string - - - -@register -def default_collate_fn(items): - '''default collate_fn - ''' - return torch.cat([x[0][None] for x in items], dim=0), [x[1] for x in items] diff --git a/src2/data/functional.py b/src2/data/functional.py deleted file mode 100644 index 336baa2ee632591a00db3733c389979f3b454348..0000000000000000000000000000000000000000 --- a/src2/data/functional.py +++ /dev/null @@ -1,169 +0,0 @@ -import torch -import torchvision.transforms.functional as F - -from packaging import version -from typing import Optional, List -from torch import Tensor - -# needed due to empty tensor bug in pytorch and torchvision 0.5 -import torchvision -if version.parse(torchvision.__version__) < version.parse('0.7'): - from torchvision.ops import _new_empty_tensor - from torchvision.ops.misc import _output_size - - -def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None): - # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor - """ - Equivalent to nn.functional.interpolate, but with support for empty batch sizes. - This will eventually be supported natively by PyTorch, and this - class can go away. - """ - if version.parse(torchvision.__version__) < version.parse('0.7'): - if input.numel() > 0: - return torch.nn.functional.interpolate( - input, size, scale_factor, mode, align_corners - ) - - output_shape = _output_size(2, input, size, scale_factor) - output_shape = list(input.shape[:-2]) + list(output_shape) - return _new_empty_tensor(input, output_shape) - else: - return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners) - - - -def crop(image, target, region): - cropped_image = F.crop(image, *region) - - target = target.copy() - i, j, h, w = region - - # should we do something wrt the original size? - target["size"] = torch.tensor([h, w]) - - fields = ["labels", "area", "iscrowd"] - - if "boxes" in target: - boxes = target["boxes"] - max_size = torch.as_tensor([w, h], dtype=torch.float32) - cropped_boxes = boxes - torch.as_tensor([j, i, j, i]) - cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size) - cropped_boxes = cropped_boxes.clamp(min=0) - area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1) - target["boxes"] = cropped_boxes.reshape(-1, 4) - target["area"] = area - fields.append("boxes") - - if "masks" in target: - # FIXME should we update the area here if there are no boxes? - target['masks'] = target['masks'][:, i:i + h, j:j + w] - fields.append("masks") - - # remove elements for which the boxes or masks that have zero area - if "boxes" in target or "masks" in target: - # favor boxes selection when defining which elements to keep - # this is compatible with previous implementation - if "boxes" in target: - cropped_boxes = target['boxes'].reshape(-1, 2, 2) - keep = torch.all(cropped_boxes[:, 1, :] > cropped_boxes[:, 0, :], dim=1) - else: - keep = target['masks'].flatten(1).any(1) - - for field in fields: - target[field] = target[field][keep] - - return cropped_image, target - - -def hflip(image, target): - flipped_image = F.hflip(image) - - w, h = image.size - - target = target.copy() - if "boxes" in target: - boxes = target["boxes"] - boxes = boxes[:, [2, 1, 0, 3]] * torch.as_tensor([-1, 1, -1, 1]) + torch.as_tensor([w, 0, w, 0]) - target["boxes"] = boxes - - if "masks" in target: - target['masks'] = target['masks'].flip(-1) - - return flipped_image, target - - -def resize(image, target, size, max_size=None): - # size can be min_size (scalar) or (w, h) tuple - - def get_size_with_aspect_ratio(image_size, size, max_size=None): - w, h = image_size - if max_size is not None: - min_original_size = float(min((w, h))) - max_original_size = float(max((w, h))) - if max_original_size / min_original_size * size > max_size: - size = int(round(max_size * min_original_size / max_original_size)) - - if (w <= h and w == size) or (h <= w and h == size): - return (h, w) - - if w < h: - ow = size - oh = int(size * h / w) - else: - oh = size - ow = int(size * w / h) - - # r = min(size / min(h, w), max_size / max(h, w)) - # ow = int(w * r) - # oh = int(h * r) - - return (oh, ow) - - def get_size(image_size, size, max_size=None): - if isinstance(size, (list, tuple)): - return size[::-1] - else: - return get_size_with_aspect_ratio(image_size, size, max_size) - - size = get_size(image.size, size, max_size) - rescaled_image = F.resize(image, size) - - if target is None: - return rescaled_image, None - - ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size)) - ratio_width, ratio_height = ratios - - target = target.copy() - if "boxes" in target: - boxes = target["boxes"] - scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height]) - target["boxes"] = scaled_boxes - - if "area" in target: - area = target["area"] - scaled_area = area * (ratio_width * ratio_height) - target["area"] = scaled_area - - h, w = size - target["size"] = torch.tensor([h, w]) - - if "masks" in target: - target['masks'] = interpolate( - target['masks'][:, None].float(), size, mode="nearest")[:, 0] > 0.5 - - return rescaled_image, target - - -def pad(image, target, padding): - # assumes that we only pad on the bottom right corners - padded_image = F.pad(image, (0, 0, padding[0], padding[1])) - if target is None: - return padded_image, None - target = target.copy() - # should we do something wrt the original size? - target["size"] = torch.tensor(padded_image.size[::-1]) - if "masks" in target: - target['masks'] = torch.nn.functional.pad(target['masks'], (0, padding[0], 0, padding[1])) - return padded_image, target diff --git a/src2/data/transforms.py b/src2/data/transforms.py deleted file mode 100644 index 3fd3945cb5b7444c5e41bbe68f290e2b7e0781ce..0000000000000000000000000000000000000000 --- a/src2/data/transforms.py +++ /dev/null @@ -1,142 +0,0 @@ -""""by lyuwenyu -""" - - -import torch -import torch.nn as nn - -import torchvision -torchvision.disable_beta_transforms_warning() -from torchvision import datapoints - -import torchvision.transforms.v2 as T -import torchvision.transforms.v2.functional as F - -from PIL import Image -from typing import Any, Dict, List, Optional - -from src.core import register, GLOBAL_CONFIG - - -__all__ = ['Compose', ] - - -RandomPhotometricDistort = register(T.RandomPhotometricDistort) -RandomZoomOut = register(T.RandomZoomOut) -# RandomIoUCrop = register(T.RandomIoUCrop) -RandomHorizontalFlip = register(T.RandomHorizontalFlip) -Resize = register(T.Resize) -ToImageTensor = register(T.ToImageTensor) -ConvertDtype = register(T.ConvertDtype) -SanitizeBoundingBox = register(T.SanitizeBoundingBox) -RandomCrop = register(T.RandomCrop) -Normalize = register(T.Normalize) - - - -@register -class Compose(T.Compose): - def __init__(self, ops) -> None: - transforms = [] - if ops is not None: - for op in ops: - if isinstance(op, dict): - name = op.pop('type') - transfom = getattr(GLOBAL_CONFIG[name]['_pymodule'], name)(**op) - transforms.append(transfom) - # op['type'] = name - elif isinstance(op, nn.Module): - transforms.append(op) - - else: - raise ValueError('') - else: - transforms =[EmptyTransform(), ] - - super().__init__(transforms=transforms) - - -@register -class EmptyTransform(T.Transform): - def __init__(self, ) -> None: - super().__init__() - - def forward(self, *inputs): - inputs = inputs if len(inputs) > 1 else inputs[0] - return inputs - - -@register -class PadToSize(T.Pad): - _transformed_types = ( - Image.Image, - datapoints.Image, - datapoints.Video, - datapoints.Mask, - datapoints.BoundingBox, - ) - def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]: - sz = F.get_spatial_size(flat_inputs[0]) - h, w = self.spatial_size[0] - sz[0], self.spatial_size[1] - sz[1] - self.padding = [0, 0, w, h] - return dict(padding=self.padding) - - def __init__(self, spatial_size, fill=0, padding_mode='constant') -> None: - if isinstance(spatial_size, int): - spatial_size = (spatial_size, spatial_size) - - self.spatial_size = spatial_size - super().__init__(0, fill, padding_mode) - - def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: - fill = self._fill[type(inpt)] - padding = params['padding'] - return F.pad(inpt, padding=padding, fill=fill, padding_mode=self.padding_mode) # type: ignore[arg-type] - - def __call__(self, *inputs: Any) -> Any: - outputs = super().forward(*inputs) - if len(outputs) > 1 and isinstance(outputs[1], dict): - outputs[1]['padding'] = torch.tensor(self.padding) - return outputs - - -@register -class RandomIoUCrop(T.RandomIoUCrop): - def __init__(self, min_scale: float = 0.3, max_scale: float = 1, min_aspect_ratio: float = 0.5, max_aspect_ratio: float = 2, sampler_options: Optional[List[float]] = None, trials: int = 40, p: float = 1.0): - super().__init__(min_scale, max_scale, min_aspect_ratio, max_aspect_ratio, sampler_options, trials) - self.p = p - - def __call__(self, *inputs: Any) -> Any: - if torch.rand(1) >= self.p: - return inputs if len(inputs) > 1 else inputs[0] - - return super().forward(*inputs) - - -@register -class ConvertBox(T.Transform): - _transformed_types = ( - datapoints.BoundingBox, - ) - def __init__(self, out_fmt='', normalize=False) -> None: - super().__init__() - self.out_fmt = out_fmt - self.normalize = normalize - - self.data_fmt = { - 'xyxy': datapoints.BoundingBoxFormat.XYXY, - 'cxcywh': datapoints.BoundingBoxFormat.CXCYWH - } - - def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: - if self.out_fmt: - spatial_size = inpt.spatial_size - in_fmt = inpt.format.value.lower() - inpt = torchvision.ops.box_convert(inpt, in_fmt=in_fmt, out_fmt=self.out_fmt) - inpt = datapoints.BoundingBox(inpt, format=self.data_fmt[self.out_fmt], spatial_size=spatial_size) - - if self.normalize: - inpt = inpt / torch.tensor(inpt.spatial_size[::-1]).tile(2)[None] - - return inpt - diff --git a/src2/misc/__init__.py b/src2/misc/__init__.py deleted file mode 100644 index 802b61ebff92ca70d0106ecbadfa206f0b79d964..0000000000000000000000000000000000000000 --- a/src2/misc/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ - -from .logger import * -from .visualizer import * diff --git a/src2/misc/__pycache__/__init__.cpython-310.pyc b/src2/misc/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 595da0f5dd749727cc81626c0d4dfa2b7b49173a..0000000000000000000000000000000000000000 Binary files a/src2/misc/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/src2/misc/__pycache__/dist.cpython-310.pyc b/src2/misc/__pycache__/dist.cpython-310.pyc deleted file mode 100644 index 4afc70211adbda5971bb1b254563ecee00229774..0000000000000000000000000000000000000000 Binary files a/src2/misc/__pycache__/dist.cpython-310.pyc and /dev/null differ diff --git a/src2/misc/__pycache__/logger.cpython-310.pyc b/src2/misc/__pycache__/logger.cpython-310.pyc deleted file mode 100644 index 20bd9badb2f979692e66e50157a268e06ec26297..0000000000000000000000000000000000000000 Binary files a/src2/misc/__pycache__/logger.cpython-310.pyc and /dev/null differ diff --git a/src2/misc/__pycache__/visualizer.cpython-310.pyc b/src2/misc/__pycache__/visualizer.cpython-310.pyc deleted file mode 100644 index a7b9c6cdea69134b19cd12ad1b0e13b89e48f013..0000000000000000000000000000000000000000 Binary files a/src2/misc/__pycache__/visualizer.cpython-310.pyc and /dev/null differ diff --git a/src2/misc/dist.py b/src2/misc/dist.py deleted file mode 100644 index 4c547c001b9ccf62d386b18e877ee3a034a11d92..0000000000000000000000000000000000000000 --- a/src2/misc/dist.py +++ /dev/null @@ -1,190 +0,0 @@ -""" -reference -- https://github.com/pytorch/vision/blob/main/references/detection/utils.py -- https://github.com/facebookresearch/detr/blob/master/util/misc.py#L406 - -by lyuwenyu -""" - -import random -import numpy as np - -import torch -import torch.nn as nn -import torch.distributed -import torch.distributed as tdist - -from torch.nn.parallel import DistributedDataParallel as DDP - -from torch.utils.data import DistributedSampler -from torch.utils.data.dataloader import DataLoader - - -def init_distributed(): - ''' - distributed setup - args: - backend (str), ('nccl', 'gloo') - ''' - try: - # # https://pytorch.org/docs/stable/elastic/run.html - # LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) - # RANK = int(os.getenv('RANK', -1)) - # WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) - - tdist.init_process_group(init_method='env://', ) - torch.distributed.barrier() - - rank = get_rank() - device = torch.device(f'cuda:{rank}') - torch.cuda.set_device(device) - - setup_print(rank == 0) - print('Initialized distributed mode...') - - return True - - except: - print('Not init distributed mode.') - return False - - -def setup_print(is_main): - '''This function disables printing when not in master process - ''' - import builtins as __builtin__ - builtin_print = __builtin__.print - - def print(*args, **kwargs): - force = kwargs.pop('force', False) - if is_main or force: - builtin_print(*args, **kwargs) - - __builtin__.print = print - - -def is_dist_available_and_initialized(): - if not tdist.is_available(): - return False - if not tdist.is_initialized(): - return False - return True - - -def get_rank(): - if not is_dist_available_and_initialized(): - return 0 - return tdist.get_rank() - - -def get_world_size(): - if not is_dist_available_and_initialized(): - return 1 - return tdist.get_world_size() - - -def is_main_process(): - return get_rank() == 0 - - -def save_on_master(*args, **kwargs): - if is_main_process(): - torch.save(*args, **kwargs) - - - -def warp_model(model, find_unused_parameters=False, sync_bn=False,): - if is_dist_available_and_initialized(): - rank = get_rank() - model = nn.SyncBatchNorm.convert_sync_batchnorm(model) if sync_bn else model - model = DDP(model, device_ids=[rank], output_device=rank, find_unused_parameters=find_unused_parameters) - return model - - -def warp_loader(loader, shuffle=False): - if is_dist_available_and_initialized(): - sampler = DistributedSampler(loader.dataset, shuffle=shuffle) - loader = DataLoader(loader.dataset, - loader.batch_size, - sampler=sampler, - drop_last=loader.drop_last, - collate_fn=loader.collate_fn, - pin_memory=loader.pin_memory, - num_workers=loader.num_workers, ) - return loader - - - -def is_parallel(model) -> bool: - # Returns True if model is of type DP or DDP - return type(model) in (torch.nn.parallel.DataParallel, torch.nn.parallel.DistributedDataParallel) - - -def de_parallel(model) -> nn.Module: - # De-parallelize a model: returns single-GPU model if model is of type DP or DDP - return model.module if is_parallel(model) else model - - -def reduce_dict(data, avg=True): - ''' - Args - data dict: input, {k: v, ...} - avg bool: true - ''' - world_size = get_world_size() - if world_size < 2: - return data - - with torch.no_grad(): - keys, values = [], [] - for k in sorted(data.keys()): - keys.append(k) - values.append(data[k]) - - values = torch.stack(values, dim=0) - tdist.all_reduce(values) - - if avg is True: - values /= world_size - - _data = {k: v for k, v in zip(keys, values)} - - return _data - - - -def all_gather(data): - """ - Run all_gather on arbitrary picklable data (not necessarily tensors) - Args: - data: any picklable object - Returns: - list[data]: list of data gathered from each rank - """ - world_size = get_world_size() - if world_size == 1: - return [data] - data_list = [None] * world_size - tdist.all_gather_object(data_list, data) - return data_list - - -import time -def sync_time(): - '''sync_time - ''' - if torch.cuda.is_available(): - torch.cuda.synchronize() - - return time.time() - - - -def set_seed(seed): - # fix the seed for reproducibility - seed = seed + get_rank() - torch.manual_seed(seed) - np.random.seed(seed) - random.seed(seed) - - diff --git a/src2/misc/logger.py b/src2/misc/logger.py deleted file mode 100644 index 67405304dd29738a82866b0af0803d1007a661d6..0000000000000000000000000000000000000000 --- a/src2/misc/logger.py +++ /dev/null @@ -1,239 +0,0 @@ -""" -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -https://github.com/facebookresearch/detr/blob/main/util/misc.py -Mostly copy-paste from torchvision references. -""" - -import time -import pickle -import datetime -from collections import defaultdict, deque -from typing import Dict - -import torch -import torch.distributed as tdist - -from .dist import is_dist_available_and_initialized, get_world_size - - -class SmoothedValue(object): - """Track a series of values and provide access to smoothed values over a - window or the global series average. - """ - - def __init__(self, window_size=20, fmt=None): - if fmt is None: - fmt = "{median:.4f} ({global_avg:.4f})" - self.deque = deque(maxlen=window_size) - self.total = 0.0 - self.count = 0 - self.fmt = fmt - - def update(self, value, n=1): - self.deque.append(value) - self.count += n - self.total += value * n - - def synchronize_between_processes(self): - """ - Warning: does not synchronize the deque! - """ - if not is_dist_available_and_initialized(): - return - t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') - tdist.barrier() - tdist.all_reduce(t) - t = t.tolist() - self.count = int(t[0]) - self.total = t[1] - - @property - def median(self): - d = torch.tensor(list(self.deque)) - return d.median().item() - - @property - def avg(self): - d = torch.tensor(list(self.deque), dtype=torch.float32) - return d.mean().item() - - @property - def global_avg(self): - return self.total / self.count - - @property - def max(self): - return max(self.deque) - - @property - def value(self): - return self.deque[-1] - - def __str__(self): - return self.fmt.format( - median=self.median, - avg=self.avg, - global_avg=self.global_avg, - max=self.max, - value=self.value) - - -def all_gather(data): - """ - Run all_gather on arbitrary picklable data (not necessarily tensors) - Args: - data: any picklable object - Returns: - list[data]: list of data gathered from each rank - """ - world_size = get_world_size() - if world_size == 1: - return [data] - - # serialized to a Tensor - buffer = pickle.dumps(data) - storage = torch.ByteStorage.from_buffer(buffer) - tensor = torch.ByteTensor(storage).to("cuda") - - # obtain Tensor size of each rank - local_size = torch.tensor([tensor.numel()], device="cuda") - size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] - tdist.all_gather(size_list, local_size) - size_list = [int(size.item()) for size in size_list] - max_size = max(size_list) - - # receiving Tensor from all ranks - # we pad the tensor because torch all_gather does not support - # gathering tensors of different shapes - tensor_list = [] - for _ in size_list: - tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) - if local_size != max_size: - padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") - tensor = torch.cat((tensor, padding), dim=0) - tdist.all_gather(tensor_list, tensor) - - data_list = [] - for size, tensor in zip(size_list, tensor_list): - buffer = tensor.cpu().numpy().tobytes()[:size] - data_list.append(pickle.loads(buffer)) - - return data_list - - -def reduce_dict(input_dict, average=True) -> Dict[str, torch.Tensor]: - """ - Args: - input_dict (dict): all the values will be reduced - average (bool): whether to do average or sum - Reduce the values in the dictionary from all processes so that all processes - have the averaged results. Returns a dict with the same fields as - input_dict, after reduction. - """ - world_size = get_world_size() - if world_size < 2: - return input_dict - with torch.no_grad(): - names = [] - values = [] - # sort the keys so that they are consistent across processes - for k in sorted(input_dict.keys()): - names.append(k) - values.append(input_dict[k]) - values = torch.stack(values, dim=0) - tdist.all_reduce(values) - if average: - values /= world_size - reduced_dict = {k: v for k, v in zip(names, values)} - return reduced_dict - - -class MetricLogger(object): - def __init__(self, delimiter="\t"): - self.meters = defaultdict(SmoothedValue) - self.delimiter = delimiter - - def update(self, **kwargs): - for k, v in kwargs.items(): - if isinstance(v, torch.Tensor): - v = v.item() - assert isinstance(v, (float, int)) - self.meters[k].update(v) - - def __getattr__(self, attr): - if attr in self.meters: - return self.meters[attr] - if attr in self.__dict__: - return self.__dict__[attr] - raise AttributeError("'{}' object has no attribute '{}'".format( - type(self).__name__, attr)) - - def __str__(self): - loss_str = [] - for name, meter in self.meters.items(): - loss_str.append( - "{}: {}".format(name, str(meter)) - ) - return self.delimiter.join(loss_str) - - def synchronize_between_processes(self): - for meter in self.meters.values(): - meter.synchronize_between_processes() - - def add_meter(self, name, meter): - self.meters[name] = meter - - def log_every(self, iterable, print_freq, header=None): - i = 0 - if not header: - header = '' - start_time = time.time() - end = time.time() - iter_time = SmoothedValue(fmt='{avg:.4f}') - data_time = SmoothedValue(fmt='{avg:.4f}') - space_fmt = ':' + str(len(str(len(iterable)))) + 'd' - if torch.cuda.is_available(): - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}', - 'max mem: {memory:.0f}' - ]) - else: - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}' - ]) - MB = 1024.0 * 1024.0 - for obj in iterable: - data_time.update(time.time() - end) - yield obj - iter_time.update(time.time() - end) - if i % print_freq == 0 or i == len(iterable) - 1: - eta_seconds = iter_time.global_avg * (len(iterable) - i) - eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) - if torch.cuda.is_available(): - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time), - memory=torch.cuda.max_memory_allocated() / MB)) - else: - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time))) - i += 1 - end = time.time() - total_time = time.time() - start_time - total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('{} Total time: {} ({:.4f} s / it)'.format( - header, total_time_str, total_time / len(iterable))) - diff --git a/src2/misc/visualizer.py b/src2/misc/visualizer.py deleted file mode 100644 index 843f8eb4ed5090602d9facdd9180d182d7e4f74e..0000000000000000000000000000000000000000 --- a/src2/misc/visualizer.py +++ /dev/null @@ -1,34 +0,0 @@ -""""by lyuwenyu -""" - -import torch -import torch.utils.data - -import torchvision -torchvision.disable_beta_transforms_warning() - -import PIL - -__all__ = ['show_sample'] - -def show_sample(sample): - """for coco dataset/dataloader - """ - import matplotlib.pyplot as plt - from torchvision.transforms.v2 import functional as F - from torchvision.utils import draw_bounding_boxes - - image, target = sample - if isinstance(image, PIL.Image.Image): - image = F.to_image_tensor(image) - - image = F.convert_dtype(image, torch.uint8) - annotated_image = draw_bounding_boxes(image, target["boxes"], colors="yellow", width=3) - - fig, ax = plt.subplots() - ax.imshow(annotated_image.permute(1, 2, 0).numpy()) - ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[]) - fig.tight_layout() - fig.show() - plt.show() - diff --git a/src2/nn/__init__.py b/src2/nn/__init__.py deleted file mode 100644 index 7df8a1c0891e690a53162f53fc4a14b90a1351dd..0000000000000000000000000000000000000000 --- a/src2/nn/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ - -from .arch import * -from .criterion import * - -# -from .backbone import * - diff --git a/src2/nn/__pycache__/__init__.cpython-310.pyc b/src2/nn/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 02efe3bd64fc3037a7a5f51f7b39e38943a827c7..0000000000000000000000000000000000000000 Binary files a/src2/nn/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/src2/nn/arch/__init__.py b/src2/nn/arch/__init__.py deleted file mode 100644 index 070f19b2f344b67a194424e4f4cf9b5d824ee8f2..0000000000000000000000000000000000000000 --- a/src2/nn/arch/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .classification import * diff --git a/src2/nn/arch/__pycache__/__init__.cpython-310.pyc b/src2/nn/arch/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 035fbdd7dd72670e38a655b33378fba40869d39c..0000000000000000000000000000000000000000 Binary files a/src2/nn/arch/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/src2/nn/arch/__pycache__/classification.cpython-310.pyc b/src2/nn/arch/__pycache__/classification.cpython-310.pyc deleted file mode 100644 index 4af4b32605d35dbaa20718dfe10569db00d98ecf..0000000000000000000000000000000000000000 Binary files a/src2/nn/arch/__pycache__/classification.cpython-310.pyc and /dev/null differ diff --git a/src2/nn/arch/classification.py b/src2/nn/arch/classification.py deleted file mode 100644 index 2f1fa568ff12517b3e9d47bf464180b16964b088..0000000000000000000000000000000000000000 --- a/src2/nn/arch/classification.py +++ /dev/null @@ -1,41 +0,0 @@ -import torch -import torch.nn as nn - -from src.core import register - - -__all__ = ['Classification', 'ClassHead'] - - -@register -class Classification(nn.Module): - __inject__ = ['backbone', 'head'] - - def __init__(self, backbone: nn.Module, head: nn.Module=None): - super().__init__() - - self.backbone = backbone - self.head = head - - def forward(self, x): - x = self.backbone(x) - - if self.head is not None: - x = self.head(x) - - return x - - -@register -class ClassHead(nn.Module): - def __init__(self, hidden_dim, num_classes): - super().__init__() - self.pool = nn.AdaptiveAvgPool2d(1) - self.proj = nn.Linear(hidden_dim, num_classes) - - def forward(self, x): - x = x[0] if isinstance(x, (list, tuple)) else x - x = self.pool(x) - x = x.reshape(x.shape[0], -1) - x = self.proj(x) - return x diff --git a/src2/nn/backbone/__init__.py b/src2/nn/backbone/__init__.py deleted file mode 100644 index ea44c6b430e4d814607d4a0f986463a357d781d0..0000000000000000000000000000000000000000 --- a/src2/nn/backbone/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ - -from .presnet import * -from .test_resnet import * - -from .common import * \ No newline at end of file diff --git a/src2/nn/backbone/__pycache__/__init__.cpython-310.pyc b/src2/nn/backbone/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 6e2121957befc8519c57baf1104c2f73c3698601..0000000000000000000000000000000000000000 Binary files a/src2/nn/backbone/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/src2/nn/backbone/__pycache__/common.cpython-310.pyc b/src2/nn/backbone/__pycache__/common.cpython-310.pyc deleted file mode 100644 index 6d09935ee09ef7be5511d22a7357e00fde496c05..0000000000000000000000000000000000000000 Binary files a/src2/nn/backbone/__pycache__/common.cpython-310.pyc and /dev/null differ diff --git a/src2/nn/backbone/__pycache__/presnet.cpython-310.pyc b/src2/nn/backbone/__pycache__/presnet.cpython-310.pyc deleted file mode 100644 index ed61188ae41893df48d61d02f69bf5b10dda4dbd..0000000000000000000000000000000000000000 Binary files a/src2/nn/backbone/__pycache__/presnet.cpython-310.pyc and /dev/null differ diff --git a/src2/nn/backbone/__pycache__/test_resnet.cpython-310.pyc b/src2/nn/backbone/__pycache__/test_resnet.cpython-310.pyc deleted file mode 100644 index 1cd30af8b5f734fcec2ad02351b403f17bbad72c..0000000000000000000000000000000000000000 Binary files a/src2/nn/backbone/__pycache__/test_resnet.cpython-310.pyc and /dev/null differ diff --git a/src2/nn/backbone/common.py b/src2/nn/backbone/common.py deleted file mode 100644 index 72e38d7d8e9f1460ad09eea8d02e16c133a6e054..0000000000000000000000000000000000000000 --- a/src2/nn/backbone/common.py +++ /dev/null @@ -1,102 +0,0 @@ -'''by lyuwenyu -''' - -import torch -import torch.nn as nn - - - -class ConvNormLayer(nn.Module): - def __init__(self, ch_in, ch_out, kernel_size, stride, padding=None, bias=False, act=None): - super().__init__() - self.conv = nn.Conv2d( - ch_in, - ch_out, - kernel_size, - stride, - padding=(kernel_size-1)//2 if padding is None else padding, - bias=bias) - self.norm = nn.BatchNorm2d(ch_out) - self.act = nn.Identity() if act is None else get_activation(act) - - def forward(self, x): - return self.act(self.norm(self.conv(x))) - - -class FrozenBatchNorm2d(nn.Module): - """copy and modified from https://github.com/facebookresearch/detr/blob/master/models/backbone.py - BatchNorm2d where the batch statistics and the affine parameters are fixed. - Copy-paste from torchvision.misc.ops with added eps before rqsrt, - without which any other models than torchvision.models.resnet[18,34,50,101] - produce nans. - """ - def __init__(self, num_features, eps=1e-5): - super(FrozenBatchNorm2d, self).__init__() - n = num_features - self.register_buffer("weight", torch.ones(n)) - self.register_buffer("bias", torch.zeros(n)) - self.register_buffer("running_mean", torch.zeros(n)) - self.register_buffer("running_var", torch.ones(n)) - self.eps = eps - self.num_features = n - - def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, - missing_keys, unexpected_keys, error_msgs): - num_batches_tracked_key = prefix + 'num_batches_tracked' - if num_batches_tracked_key in state_dict: - del state_dict[num_batches_tracked_key] - - super(FrozenBatchNorm2d, self)._load_from_state_dict( - state_dict, prefix, local_metadata, strict, - missing_keys, unexpected_keys, error_msgs) - - def forward(self, x): - # move reshapes to the beginning - # to make it fuser-friendly - w = self.weight.reshape(1, -1, 1, 1) - b = self.bias.reshape(1, -1, 1, 1) - rv = self.running_var.reshape(1, -1, 1, 1) - rm = self.running_mean.reshape(1, -1, 1, 1) - scale = w * (rv + self.eps).rsqrt() - bias = b - rm * scale - return x * scale + bias - - def extra_repr(self): - return ( - "{num_features}, eps={eps}".format(**self.__dict__) - ) - - -def get_activation(act: str, inpace: bool=True): - '''get activation - ''' - act = act.lower() - - if act == 'silu': - m = nn.SiLU() - - elif act == 'relu': - m = nn.ReLU() - - elif act == 'leaky_relu': - m = nn.LeakyReLU() - - elif act == 'silu': - m = nn.SiLU() - - elif act == 'gelu': - m = nn.GELU() - - elif act is None: - m = nn.Identity() - - elif isinstance(act, nn.Module): - m = act - - else: - raise RuntimeError('') - - if hasattr(m, 'inplace'): - m.inplace = inpace - - return m diff --git a/src2/nn/backbone/presnet.py b/src2/nn/backbone/presnet.py deleted file mode 100644 index 2a6b4baa86432bf89809c65ac28743d21c7ceb38..0000000000000000000000000000000000000000 --- a/src2/nn/backbone/presnet.py +++ /dev/null @@ -1,225 +0,0 @@ -'''by lyuwenyu -''' -import torch -import torch.nn as nn -import torch.nn.functional as F - -from collections import OrderedDict - -from .common import get_activation, ConvNormLayer, FrozenBatchNorm2d - -from src.core import register - - -__all__ = ['PResNet'] - - -ResNet_cfg = { - 18: [2, 2, 2, 2], - 34: [3, 4, 6, 3], - 50: [3, 4, 6, 3], - 101: [3, 4, 23, 3], - # 152: [3, 8, 36, 3], -} - - -donwload_url = { - 18: 'https://github.com/lyuwenyu/storage/releases/download/v0.1/ResNet18_vd_pretrained_from_paddle.pth', - 34: 'https://github.com/lyuwenyu/storage/releases/download/v0.1/ResNet34_vd_pretrained_from_paddle.pth', - 50: 'https://github.com/lyuwenyu/storage/releases/download/v0.1/ResNet50_vd_ssld_v2_pretrained_from_paddle.pth', - 101: 'https://github.com/lyuwenyu/storage/releases/download/v0.1/ResNet101_vd_ssld_pretrained_from_paddle.pth', -} - - -class BasicBlock(nn.Module): - expansion = 1 - - def __init__(self, ch_in, ch_out, stride, shortcut, act='relu', variant='b'): - super().__init__() - - self.shortcut = shortcut - - if not shortcut: - if variant == 'd' and stride == 2: - self.short = nn.Sequential(OrderedDict([ - ('pool', nn.AvgPool2d(2, 2, 0, ceil_mode=True)), - ('conv', ConvNormLayer(ch_in, ch_out, 1, 1)) - ])) - else: - self.short = ConvNormLayer(ch_in, ch_out, 1, stride) - - self.branch2a = ConvNormLayer(ch_in, ch_out, 3, stride, act=act) - self.branch2b = ConvNormLayer(ch_out, ch_out, 3, 1, act=None) - self.act = nn.Identity() if act is None else get_activation(act) - - - def forward(self, x): - out = self.branch2a(x) - out = self.branch2b(out) - if self.shortcut: - short = x - else: - short = self.short(x) - - out = out + short - out = self.act(out) - - return out - - -class BottleNeck(nn.Module): - expansion = 4 - - def __init__(self, ch_in, ch_out, stride, shortcut, act='relu', variant='b'): - super().__init__() - - if variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - width = ch_out - - self.branch2a = ConvNormLayer(ch_in, width, 1, stride1, act=act) - self.branch2b = ConvNormLayer(width, width, 3, stride2, act=act) - self.branch2c = ConvNormLayer(width, ch_out * self.expansion, 1, 1) - - self.shortcut = shortcut - if not shortcut: - if variant == 'd' and stride == 2: - self.short = nn.Sequential(OrderedDict([ - ('pool', nn.AvgPool2d(2, 2, 0, ceil_mode=True)), - ('conv', ConvNormLayer(ch_in, ch_out * self.expansion, 1, 1)) - ])) - else: - self.short = ConvNormLayer(ch_in, ch_out * self.expansion, 1, stride) - - self.act = nn.Identity() if act is None else get_activation(act) - - def forward(self, x): - out = self.branch2a(x) - out = self.branch2b(out) - out = self.branch2c(out) - - if self.shortcut: - short = x - else: - short = self.short(x) - - out = out + short - out = self.act(out) - - return out - - -class Blocks(nn.Module): - def __init__(self, block, ch_in, ch_out, count, stage_num, act='relu', variant='b'): - super().__init__() - - self.blocks = nn.ModuleList() - for i in range(count): - self.blocks.append( - block( - ch_in, - ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - shortcut=False if i == 0 else True, - variant=variant, - act=act) - ) - - if i == 0: - ch_in = ch_out * block.expansion - - def forward(self, x): - out = x - for block in self.blocks: - out = block(out) - return out - - -@register -class PResNet(nn.Module): - def __init__( - self, - depth, - variant='d', - num_stages=4, - return_idx=[0, 1, 2, 3], - act='relu', - freeze_at=-1, - freeze_norm=True, - pretrained=False): - super().__init__() - - block_nums = ResNet_cfg[depth] - ch_in = 64 - if variant in ['c', 'd']: - conv_def = [ - [3, ch_in // 2, 3, 2, "conv1_1"], - [ch_in // 2, ch_in // 2, 3, 1, "conv1_2"], - [ch_in // 2, ch_in, 3, 1, "conv1_3"], - ] - else: - conv_def = [[3, ch_in, 7, 2, "conv1_1"]] - - self.conv1 = nn.Sequential(OrderedDict([ - (_name, ConvNormLayer(c_in, c_out, k, s, act=act)) for c_in, c_out, k, s, _name in conv_def - ])) - - ch_out_list = [64, 128, 256, 512] - block = BottleNeck if depth >= 50 else BasicBlock - - _out_channels = [block.expansion * v for v in ch_out_list] - _out_strides = [4, 8, 16, 32] - - self.res_layers = nn.ModuleList() - for i in range(num_stages): - stage_num = i + 2 - self.res_layers.append( - Blocks(block, ch_in, ch_out_list[i], block_nums[i], stage_num, act=act, variant=variant) - ) - ch_in = _out_channels[i] - - self.return_idx = return_idx - self.out_channels = [_out_channels[_i] for _i in return_idx] - self.out_strides = [_out_strides[_i] for _i in return_idx] - - if freeze_at >= 0: - self._freeze_parameters(self.conv1) - for i in range(min(freeze_at, num_stages)): - self._freeze_parameters(self.res_layers[i]) - - if freeze_norm: - self._freeze_norm(self) - - if pretrained: - state = torch.hub.load_state_dict_from_url(donwload_url[depth]) - self.load_state_dict(state) - print(f'Load PResNet{depth} state_dict') - - def _freeze_parameters(self, m: nn.Module): - for p in m.parameters(): - p.requires_grad = False - - def _freeze_norm(self, m: nn.Module): - if isinstance(m, nn.BatchNorm2d): - m = FrozenBatchNorm2d(m.num_features) - else: - for name, child in m.named_children(): - _child = self._freeze_norm(child) - if _child is not child: - setattr(m, name, _child) - return m - - def forward(self, x): - conv1 = self.conv1(x) - x = F.max_pool2d(conv1, kernel_size=3, stride=2, padding=1) - outs = [] - for idx, stage in enumerate(self.res_layers): - x = stage(x) - if idx in self.return_idx: - outs.append(x) - return outs - - diff --git a/src2/nn/backbone/test_resnet.py b/src2/nn/backbone/test_resnet.py deleted file mode 100644 index 6639d79ec6b9a11fdc756cd94db211d38d566b61..0000000000000000000000000000000000000000 --- a/src2/nn/backbone/test_resnet.py +++ /dev/null @@ -1,81 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F - -from collections import OrderedDict - - -from src.core import register - - -class BasicBlock(nn.Module): - expansion = 1 - - def __init__(self, in_planes, planes, stride=1): - super(BasicBlock, self).__init__() - - self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(planes) - - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - - self.shortcut = nn.Sequential() - if stride != 1 or in_planes != self.expansion*planes: - self.shortcut = nn.Sequential( - nn.Conv2d(in_planes, self.expansion*planes,kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(self.expansion*planes) - ) - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = self.bn2(self.conv2(out)) - out += self.shortcut(x) - out = F.relu(out) - return out - - - -class _ResNet(nn.Module): - def __init__(self, block, num_blocks, num_classes=10): - super().__init__() - self.in_planes = 64 - - self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(64) - - self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) - self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) - self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) - self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) - - self.linear = nn.Linear(512 * block.expansion, num_classes) - - def _make_layer(self, block, planes, num_blocks, stride): - strides = [stride] + [1]*(num_blocks-1) - layers = [] - for stride in strides: - layers.append(block(self.in_planes, planes, stride)) - self.in_planes = planes * block.expansion - return nn.Sequential(*layers) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = self.layer1(out) - out = self.layer2(out) - out = self.layer3(out) - out = self.layer4(out) - out = F.avg_pool2d(out, 4) - out = out.view(out.size(0), -1) - out = self.linear(out) - return out - - -@register -class MResNet(nn.Module): - def __init__(self, num_classes=10, num_blocks=[2, 2, 2, 2]) -> None: - super().__init__() - self.model = _ResNet(BasicBlock, num_blocks, num_classes) - - def forward(self, x): - return self.model(x) - diff --git a/src2/nn/backbone/utils.py b/src2/nn/backbone/utils.py deleted file mode 100644 index ee250b1c9389cc11d0f553b28f23567e2c3b6860..0000000000000000000000000000000000000000 --- a/src2/nn/backbone/utils.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -https://github.com/pytorch/vision/blob/main/torchvision/models/_utils.py - -by lyuwenyu -""" - -from collections import OrderedDict -from typing import Dict, List - - -import torch.nn as nn - - -class IntermediateLayerGetter(nn.ModuleDict): - """ - Module wrapper that returns intermediate layers from a model - - It has a strong assumption that the modules have been registered - into the model in the same order as they are used. - This means that one should **not** reuse the same nn.Module - twice in the forward if you want this to work. - - Additionally, it is only able to query submodules that are directly - assigned to the model. So if `model` is passed, `model.feature1` can - be returned, but not `model.feature1.layer2`. - """ - - _version = 3 - - def __init__(self, model: nn.Module, return_layers: List[str]) -> None: - if not set(return_layers).issubset([name for name, _ in model.named_children()]): - raise ValueError("return_layers are not present in model. {}"\ - .format([name for name, _ in model.named_children()])) - orig_return_layers = return_layers - return_layers = {str(k): str(k) for k in return_layers} - layers = OrderedDict() - for name, module in model.named_children(): - layers[name] = module - if name in return_layers: - del return_layers[name] - if not return_layers: - break - - super().__init__(layers) - self.return_layers = orig_return_layers - - def forward(self, x): - # out = OrderedDict() - outputs = [] - for name, module in self.items(): - x = module(x) - if name in self.return_layers: - # out_name = self.return_layers[name] - # out[out_name] = x - outputs.append(x) - - return outputs - diff --git a/src2/nn/criterion/__init__.py b/src2/nn/criterion/__init__.py deleted file mode 100644 index 9804569a11aab6016ee0b00b46f3776ab759d63a..0000000000000000000000000000000000000000 --- a/src2/nn/criterion/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ - -import torch.nn as nn -from src.core import register - -CrossEntropyLoss = register(nn.CrossEntropyLoss) - diff --git a/src2/nn/criterion/__pycache__/__init__.cpython-310.pyc b/src2/nn/criterion/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 6b40472db141be1e9066464773da63303efced6d..0000000000000000000000000000000000000000 Binary files a/src2/nn/criterion/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/src2/nn/criterion/utils.py b/src2/nn/criterion/utils.py deleted file mode 100644 index 7d8833e30bb6ccf948abf838fca7bf7cdf012e16..0000000000000000000000000000000000000000 --- a/src2/nn/criterion/utils.py +++ /dev/null @@ -1,20 +0,0 @@ -import torch -import torchvision - - - -def format_target(targets): - ''' - Args: - targets (List[Dict]), - Return: - tensor (Tensor), [im_id, label, bbox,] - ''' - outputs = [] - for i, tgt in enumerate(targets): - boxes = torchvision.ops.box_convert(tgt['boxes'], in_fmt='xyxy', out_fmt='cxcywh') - labels = tgt['labels'].reshape(-1, 1) - im_ids = torch.ones_like(labels) * i - outputs.append(torch.cat([im_ids, labels, boxes], dim=1)) - - return torch.cat(outputs, dim=0) diff --git a/src2/optim/__init__.py b/src2/optim/__init__.py deleted file mode 100644 index 1bd7c81f9d09a57ef502c716a6f42566d9c17bae..0000000000000000000000000000000000000000 --- a/src2/optim/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ - -from .ema import * -from .optim import * -from .amp import * \ No newline at end of file diff --git a/src2/optim/__pycache__/__init__.cpython-310.pyc b/src2/optim/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 9f6159973a9f12b08b4adcc3e7429e207d8d00a2..0000000000000000000000000000000000000000 Binary files a/src2/optim/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/src2/optim/__pycache__/amp.cpython-310.pyc b/src2/optim/__pycache__/amp.cpython-310.pyc deleted file mode 100644 index 3dcd62279cfd81c973ce4d1bda89a7f1f95b4a08..0000000000000000000000000000000000000000 Binary files a/src2/optim/__pycache__/amp.cpython-310.pyc and /dev/null differ diff --git a/src2/optim/__pycache__/ema.cpython-310.pyc b/src2/optim/__pycache__/ema.cpython-310.pyc deleted file mode 100644 index edd7ed7fefcb8a0ae1bc1d06b6894533e636e91f..0000000000000000000000000000000000000000 Binary files a/src2/optim/__pycache__/ema.cpython-310.pyc and /dev/null differ diff --git a/src2/optim/__pycache__/optim.cpython-310.pyc b/src2/optim/__pycache__/optim.cpython-310.pyc deleted file mode 100644 index 6bd5a90f9289a79a3d2a9d7993d8ecc43413412d..0000000000000000000000000000000000000000 Binary files a/src2/optim/__pycache__/optim.cpython-310.pyc and /dev/null differ diff --git a/src2/optim/amp.py b/src2/optim/amp.py deleted file mode 100644 index e43d0212e445213b658ead34dd047ec17b74e541..0000000000000000000000000000000000000000 --- a/src2/optim/amp.py +++ /dev/null @@ -1,12 +0,0 @@ -import torch -import torch.nn as nn -import torch.cuda.amp as amp - - -from src.core import register -import src.misc.dist as dist - - -__all__ = ['GradScaler'] - -GradScaler = register(amp.grad_scaler.GradScaler) diff --git a/src2/optim/ema.py b/src2/optim/ema.py deleted file mode 100644 index bf962b3a7a8ef34a600053d3346444b3d17bcae1..0000000000000000000000000000000000000000 --- a/src2/optim/ema.py +++ /dev/null @@ -1,115 +0,0 @@ -""" -reference: -https://github.com/ultralytics/yolov5/blob/master/utils/torch_utils.py#L404 - -by lyuwenyu -""" - -import torch -import torch.nn as nn - -import math -from copy import deepcopy - - - -from src.core import register -import src.misc.dist as dist - - -__all__ = ['ModelEMA'] - - - -@register -class ModelEMA(object): - """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models - Keep a moving average of everything in the model state_dict (parameters and buffers). - This is intended to allow functionality like - https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage - A smoothed version of the weights is necessary for some training schemes to perform well. - This class is sensitive where it is initialized in the sequence of model init, - GPU assignment and distributed training wrappers. - """ - def __init__(self, model: nn.Module, decay: float=0.9999, warmups: int=2000): - super().__init__() - - # Create EMA - self.module = deepcopy(dist.de_parallel(model)).eval() # FP32 EMA - - # if next(model.parameters()).device.type != 'cpu': - # self.module.half() # FP16 EMA - - self.decay = decay - self.warmups = warmups - self.updates = 0 # number of EMA updates - # self.filter_no_grad = filter_no_grad - self.decay_fn = lambda x: decay * (1 - math.exp(-x / warmups)) # decay exponential ramp (to help early epochs) - - for p in self.module.parameters(): - p.requires_grad_(False) - - def update(self, model: nn.Module): - # Update EMA parameters - with torch.no_grad(): - self.updates += 1 - d = self.decay_fn(self.updates) - - msd = dist.de_parallel(model).state_dict() - for k, v in self.module.state_dict().items(): - if v.dtype.is_floating_point: - v *= d - v += (1 - d) * msd[k].detach() - - def to(self, *args, **kwargs): - self.module = self.module.to(*args, **kwargs) - return self - - def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): - # Update EMA attributes - self.copy_attr(self.module, model, include, exclude) - - @staticmethod - def copy_attr(a, b, include=(), exclude=()): - # Copy attributes from b to a, options to only include [...] and to exclude [...] - for k, v in b.__dict__.items(): - if (len(include) and k not in include) or k.startswith('_') or k in exclude: - continue - else: - setattr(a, k, v) - - def state_dict(self, ): - return dict(module=self.module.state_dict(), updates=self.updates, warmups=self.warmups) - - def load_state_dict(self, state): - self.module.load_state_dict(state['module']) - if 'updates' in state: - self.updates = state['updates'] - - def forwad(self, ): - raise RuntimeError('ema...') - - def extra_repr(self) -> str: - return f'decay={self.decay}, warmups={self.warmups}' - - - - -class ExponentialMovingAverage(torch.optim.swa_utils.AveragedModel): - """Maintains moving averages of model parameters using an exponential decay. - ``ema_avg = decay * avg_model_param + (1 - decay) * model_param`` - `torch.optim.swa_utils.AveragedModel `_ - is used to compute the EMA. - """ - def __init__(self, model, decay, device="cpu", use_buffers=True): - - self.decay_fn = lambda x: decay * (1 - math.exp(-x / 2000)) - - def ema_avg(avg_model_param, model_param, num_averaged): - decay = self.decay_fn(num_averaged) - return decay * avg_model_param + (1 - decay) * model_param - - super().__init__(model, device, ema_avg, use_buffers=use_buffers) - - - diff --git a/src2/optim/optim.py b/src2/optim/optim.py deleted file mode 100644 index b10bd82926b3f40dd63e9545ba24bfc3d8a3f651..0000000000000000000000000000000000000000 --- a/src2/optim/optim.py +++ /dev/null @@ -1,22 +0,0 @@ - -import torch -import torch.nn as nn -import torch.optim as optim -import torch.optim.lr_scheduler as lr_scheduler - -from src.core import register - - -__all__ = ['AdamW', 'SGD', 'Adam', 'MultiStepLR', 'CosineAnnealingLR', 'OneCycleLR', 'LambdaLR'] - - - -SGD = register(optim.SGD) -Adam = register(optim.Adam) -AdamW = register(optim.AdamW) - - -MultiStepLR = register(lr_scheduler.MultiStepLR) -CosineAnnealingLR = register(lr_scheduler.CosineAnnealingLR) -OneCycleLR = register(lr_scheduler.OneCycleLR) -LambdaLR = register(lr_scheduler.LambdaLR) diff --git a/src2/solver/__init__.py b/src2/solver/__init__.py deleted file mode 100644 index eddab7bf7b9a33bfb28f837b8018fb6bd4690614..0000000000000000000000000000000000000000 --- a/src2/solver/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -"""by lyuwenyu -""" - -from .solver import BaseSolver -from .det_solver import DetSolver - - -from typing import Dict - -TASKS :Dict[str, BaseSolver] = { - 'detection': DetSolver, -} \ No newline at end of file diff --git a/src2/solver/__pycache__/__init__.cpython-310.pyc b/src2/solver/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index dcb55b231b24dc158401a9610e1aec1ab57b5d15..0000000000000000000000000000000000000000 Binary files a/src2/solver/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/src2/solver/__pycache__/det_engine.cpython-310.pyc b/src2/solver/__pycache__/det_engine.cpython-310.pyc deleted file mode 100644 index 4dffcd12597fc7384380da4375cca959c9311b3c..0000000000000000000000000000000000000000 Binary files a/src2/solver/__pycache__/det_engine.cpython-310.pyc and /dev/null differ diff --git a/src2/solver/__pycache__/det_solver.cpython-310.pyc b/src2/solver/__pycache__/det_solver.cpython-310.pyc deleted file mode 100644 index 67614f0eee063d3a78ae8157d9fe83152e3d8330..0000000000000000000000000000000000000000 Binary files a/src2/solver/__pycache__/det_solver.cpython-310.pyc and /dev/null differ diff --git a/src2/solver/__pycache__/solver.cpython-310.pyc b/src2/solver/__pycache__/solver.cpython-310.pyc deleted file mode 100644 index 6bc49ec567891a95ca3b281a2757c60c297ad5ad..0000000000000000000000000000000000000000 Binary files a/src2/solver/__pycache__/solver.cpython-310.pyc and /dev/null differ diff --git a/src2/solver/det_engine.py b/src2/solver/det_engine.py deleted file mode 100644 index 7de6b03ae2e4bcaa9167d634a46848c1c16416d9..0000000000000000000000000000000000000000 --- a/src2/solver/det_engine.py +++ /dev/null @@ -1,190 +0,0 @@ -""" -Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -https://github.com/facebookresearch/detr/blob/main/engine.py - -by lyuwenyu -""" - -import math -import os -import sys -import pathlib -from typing import Iterable - -import torch -import torch.amp - -from src.data import CocoEvaluator -from src.misc import (MetricLogger, SmoothedValue, reduce_dict) - - -def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module, - data_loader: Iterable, optimizer: torch.optim.Optimizer, - device: torch.device, epoch: int, max_norm: float = 0, **kwargs): - model.train() - criterion.train() - metric_logger = MetricLogger(delimiter=" ") - metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}')) - # metric_logger.add_meter('class_error', SmoothedValue(window_size=1, fmt='{value:.2f}')) - header = 'Epoch: [{}]'.format(epoch) - print_freq = kwargs.get('print_freq', 10) - - ema = kwargs.get('ema', None) - scaler = kwargs.get('scaler', None) - - for samples, targets in metric_logger.log_every(data_loader, print_freq, header): - samples = samples.to(device) - targets = [{k: v.to(device) for k, v in t.items()} for t in targets] - - if scaler is not None: - with torch.autocast(device_type=str(device), cache_enabled=True): - outputs = model(samples, targets) - - with torch.autocast(device_type=str(device), enabled=False): - loss_dict = criterion(outputs, targets) - - loss = sum(loss_dict.values()) - scaler.scale(loss).backward() - - if max_norm > 0: - scaler.unscale_(optimizer) - torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm) - - scaler.step(optimizer) - scaler.update() - optimizer.zero_grad() - - else: - outputs = model(samples, targets) - loss_dict = criterion(outputs, targets) - - loss = sum(loss_dict.values()) - optimizer.zero_grad() - loss.backward() - - if max_norm > 0: - torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm) - - optimizer.step() - - # ema - if ema is not None: - ema.update(model) - - loss_dict_reduced = reduce_dict(loss_dict) - loss_value = sum(loss_dict_reduced.values()) - - if not math.isfinite(loss_value): - print("Loss is {}, stopping training".format(loss_value)) - print(loss_dict_reduced) - sys.exit(1) - - metric_logger.update(loss=loss_value, **loss_dict_reduced) - metric_logger.update(lr=optimizer.param_groups[0]["lr"]) - - # gather the stats from all processes - metric_logger.synchronize_between_processes() - print("Averaged stats:", metric_logger) - return {k: meter.global_avg for k, meter in metric_logger.meters.items()} - - - -@torch.no_grad() -def evaluate(model: torch.nn.Module, criterion: torch.nn.Module, postprocessors, data_loader, base_ds, device, output_dir): - model.eval() - criterion.eval() - - metric_logger = MetricLogger(delimiter=" ") - # metric_logger.add_meter('class_error', SmoothedValue(window_size=1, fmt='{value:.2f}')) - header = 'Test:' - - # iou_types = tuple(k for k in ('segm', 'bbox') if k in postprocessors.keys()) - iou_types = postprocessors.iou_types - coco_evaluator = CocoEvaluator(base_ds, iou_types) - # coco_evaluator.coco_eval[iou_types[0]].params.iouThrs = [0, 0.1, 0.5, 0.75] - - panoptic_evaluator = None - # if 'panoptic' in postprocessors.keys(): - # panoptic_evaluator = PanopticEvaluator( - # data_loader.dataset.ann_file, - # data_loader.dataset.ann_folder, - # output_dir=os.path.join(output_dir, "panoptic_eval"), - # ) - - for samples, targets in metric_logger.log_every(data_loader, 10, header): - samples = samples.to(device) - targets = [{k: v.to(device) for k, v in t.items()} for t in targets] - - # with torch.autocast(device_type=str(device)): - # outputs = model(samples) - - outputs = model(samples) - print(outputs) - # loss_dict = criterion(outputs, targets) - # weight_dict = criterion.weight_dict - # # reduce losses over all GPUs for logging purposes - # loss_dict_reduced = reduce_dict(loss_dict) - # loss_dict_reduced_scaled = {k: v * weight_dict[k] - # for k, v in loss_dict_reduced.items() if k in weight_dict} - # loss_dict_reduced_unscaled = {f'{k}_unscaled': v - # for k, v in loss_dict_reduced.items()} - # metric_logger.update(loss=sum(loss_dict_reduced_scaled.values()), - # **loss_dict_reduced_scaled, - # **loss_dict_reduced_unscaled) - # metric_logger.update(class_error=loss_dict_reduced['class_error']) - - orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) - results = postprocessors(outputs, orig_target_sizes) - # results = postprocessors(outputs, targets) - - # if 'segm' in postprocessors.keys(): - # target_sizes = torch.stack([t["size"] for t in targets], dim=0) - # results = postprocessors['segm'](results, outputs, orig_target_sizes, target_sizes) - - res = {target['image_id'].item(): output for target, output in zip(targets, results)} - if coco_evaluator is not None: - coco_evaluator.update(res) - - # if panoptic_evaluator is not None: - # res_pano = postprocessors["panoptic"](outputs, target_sizes, orig_target_sizes) - # for i, target in enumerate(targets): - # image_id = target["image_id"].item() - # file_name = f"{image_id:012d}.png" - # res_pano[i]["image_id"] = image_id - # res_pano[i]["file_name"] = file_name - # panoptic_evaluator.update(res_pano) - - # gather the stats from all processes - metric_logger.synchronize_between_processes() - print("Averaged stats:", metric_logger) - if coco_evaluator is not None: - coco_evaluator.synchronize_between_processes() - if panoptic_evaluator is not None: - panoptic_evaluator.synchronize_between_processes() - - # accumulate predictions from all images - if coco_evaluator is not None: - coco_evaluator.accumulate() - coco_evaluator.summarize() - - # panoptic_res = None - # if panoptic_evaluator is not None: - # panoptic_res = panoptic_evaluator.summarize() - - stats = {} - # stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} - if coco_evaluator is not None: - if 'bbox' in iou_types: - stats['coco_eval_bbox'] = coco_evaluator.coco_eval['bbox'].stats.tolist() - if 'segm' in iou_types: - stats['coco_eval_masks'] = coco_evaluator.coco_eval['segm'].stats.tolist() - - # if panoptic_res is not None: - # stats['PQ_all'] = panoptic_res["All"] - # stats['PQ_th'] = panoptic_res["Things"] - # stats['PQ_st'] = panoptic_res["Stuff"] - - return stats, coco_evaluator - - - diff --git a/src2/solver/det_solver.py b/src2/solver/det_solver.py deleted file mode 100644 index d0a0a8400cf851ccc641f97310059edf56db78ea..0000000000000000000000000000000000000000 --- a/src2/solver/det_solver.py +++ /dev/null @@ -1,104 +0,0 @@ -''' -by lyuwenyu -''' -import time -import json -import datetime - -import torch - -from src.misc import dist -from src.data import get_coco_api_from_dataset - -from .solver import BaseSolver -from .det_engine import train_one_epoch, evaluate - - -class DetSolver(BaseSolver): - - def fit(self, ): - print("Start training") - self.train() - - args = self.cfg - - n_parameters = sum(p.numel() for p in self.model.parameters() if p.requires_grad) - print('number of params:', n_parameters) - - base_ds = get_coco_api_from_dataset(self.val_dataloader.dataset) - # best_stat = {'coco_eval_bbox': 0, 'coco_eval_masks': 0, 'epoch': -1, } - best_stat = {'epoch': -1, } - - start_time = time.time() - for epoch in range(self.last_epoch + 1, args.epoches): - if dist.is_dist_available_and_initialized(): - self.train_dataloader.sampler.set_epoch(epoch) - - train_stats = train_one_epoch( - self.model, self.criterion, self.train_dataloader, self.optimizer, self.device, epoch, - args.clip_max_norm, print_freq=args.log_step, ema=self.ema, scaler=self.scaler) - - self.lr_scheduler.step() - - if self.output_dir: - checkpoint_paths = [self.output_dir / 'checkpoint.pth'] - # extra checkpoint before LR drop and every 100 epochs - if (epoch + 1) % args.checkpoint_step == 0: - checkpoint_paths.append(self.output_dir / f'checkpoint{epoch:04}.pth') - for checkpoint_path in checkpoint_paths: - dist.save_on_master(self.state_dict(epoch), checkpoint_path) - - module = self.ema.module if self.ema else self.model - test_stats, coco_evaluator = evaluate( - module, self.criterion, self.postprocessor, self.val_dataloader, base_ds, self.device, self.output_dir - ) - - # TODO - for k in test_stats.keys(): - if k in best_stat: - best_stat['epoch'] = epoch if test_stats[k][0] > best_stat[k] else best_stat['epoch'] - best_stat[k] = max(best_stat[k], test_stats[k][0]) - else: - best_stat['epoch'] = epoch - best_stat[k] = test_stats[k][0] - print('best_stat: ', best_stat) - - - log_stats = {**{f'train_{k}': v for k, v in train_stats.items()}, - **{f'test_{k}': v for k, v in test_stats.items()}, - 'epoch': epoch, - 'n_parameters': n_parameters} - - if self.output_dir and dist.is_main_process(): - with (self.output_dir / "log.txt").open("a") as f: - f.write(json.dumps(log_stats) + "\n") - - # for evaluation logs - if coco_evaluator is not None: - (self.output_dir / 'eval').mkdir(exist_ok=True) - if "bbox" in coco_evaluator.coco_eval: - filenames = ['latest.pth'] - if epoch % 50 == 0: - filenames.append(f'{epoch:03}.pth') - for name in filenames: - torch.save(coco_evaluator.coco_eval["bbox"].eval, - self.output_dir / "eval" / name) - - total_time = time.time() - start_time - total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('Training time {}'.format(total_time_str)) - - - def val(self, ): - self.eval() - - base_ds = get_coco_api_from_dataset(self.val_dataloader.dataset) - - module = self.ema.module if self.ema else self.model - test_stats, coco_evaluator = evaluate(module, self.criterion, self.postprocessor, - self.val_dataloader, base_ds, self.device, self.output_dir) - - if self.output_dir: - dist.save_on_master(coco_evaluator.coco_eval["bbox"].eval, self.output_dir / "eval.pth") - - return diff --git a/src2/solver/solver.py b/src2/solver/solver.py deleted file mode 100644 index 55452f28ff9d43b5cece8879e762017246f0a5f0..0000000000000000000000000000000000000000 --- a/src2/solver/solver.py +++ /dev/null @@ -1,182 +0,0 @@ -"""by lyuwenyu -""" - -import torch -import torch.nn as nn - -from datetime import datetime -from pathlib import Path -from typing import Dict - -from src.misc import dist -from src.core import BaseConfig - - -class BaseSolver(object): - def __init__(self, cfg: BaseConfig) -> None: - - self.cfg = cfg - - def setup(self, ): - '''Avoid instantiating unnecessary classes - ''' - cfg = self.cfg - device = cfg.device - self.device = device - self.last_epoch = cfg.last_epoch - - self.model = dist.warp_model(cfg.model.to(device), cfg.find_unused_parameters, cfg.sync_bn) - self.criterion = cfg.criterion.to(device) - self.postprocessor = cfg.postprocessor - - # NOTE (lvwenyu): should load_tuning_state before ema instance building - if self.cfg.tuning: - print(f'Tuning checkpoint from {self.cfg.tuning}') - self.load_tuning_state(self.cfg.tuning) - - self.scaler = cfg.scaler - self.ema = cfg.ema.to(device) if cfg.ema is not None else None - - self.output_dir = Path(cfg.output_dir) - self.output_dir.mkdir(parents=True, exist_ok=True) - - - def train(self, ): - self.setup() - self.optimizer = self.cfg.optimizer - self.lr_scheduler = self.cfg.lr_scheduler - - # NOTE instantiating order - if self.cfg.resume: - print(f'Resume checkpoint from {self.cfg.resume}') - self.resume(self.cfg.resume) - - self.train_dataloader = dist.warp_loader(self.cfg.train_dataloader, \ - shuffle=self.cfg.train_dataloader.shuffle) - self.val_dataloader = dist.warp_loader(self.cfg.val_dataloader, \ - shuffle=self.cfg.val_dataloader.shuffle) - - - def eval(self, ): - self.setup() - self.val_dataloader = dist.warp_loader(self.cfg.val_dataloader, \ - shuffle=self.cfg.val_dataloader.shuffle) - - if self.cfg.resume: - print(f'resume from {self.cfg.resume}') - self.resume(self.cfg.resume) - - - def state_dict(self, last_epoch): - '''state dict - ''' - state = {} - state['model'] = dist.de_parallel(self.model).state_dict() - state['date'] = datetime.now().isoformat() - - # TODO - state['last_epoch'] = last_epoch - - if self.optimizer is not None: - state['optimizer'] = self.optimizer.state_dict() - - if self.lr_scheduler is not None: - state['lr_scheduler'] = self.lr_scheduler.state_dict() - # state['last_epoch'] = self.lr_scheduler.last_epoch - - if self.ema is not None: - state['ema'] = self.ema.state_dict() - - if self.scaler is not None: - state['scaler'] = self.scaler.state_dict() - - return state - - - def load_state_dict(self, state): - '''load state dict - ''' - # TODO - if getattr(self, 'last_epoch', None) and 'last_epoch' in state: - self.last_epoch = state['last_epoch'] - print('Loading last_epoch') - - if getattr(self, 'model', None) and 'model' in state: - if dist.is_parallel(self.model): - self.model.module.load_state_dict(state['model']) - else: - self.model.load_state_dict(state['model']) - print('Loading model.state_dict') - - if getattr(self, 'ema', None) and 'ema' in state: - self.ema.load_state_dict(state['ema']) - print('Loading ema.state_dict') - - if getattr(self, 'optimizer', None) and 'optimizer' in state: - self.optimizer.load_state_dict(state['optimizer']) - print('Loading optimizer.state_dict') - - if getattr(self, 'lr_scheduler', None) and 'lr_scheduler' in state: - self.lr_scheduler.load_state_dict(state['lr_scheduler']) - print('Loading lr_scheduler.state_dict') - - if getattr(self, 'scaler', None) and 'scaler' in state: - self.scaler.load_state_dict(state['scaler']) - print('Loading scaler.state_dict') - - - def save(self, path): - '''save state - ''' - state = self.state_dict() - dist.save_on_master(state, path) - - - def resume(self, path): - '''load resume - ''' - # for cuda:0 memory - state = torch.load(path, map_location='cpu') - self.load_state_dict(state) - - def load_tuning_state(self, path,): - """only load model for tuning and skip missed/dismatched keys - """ - if 'http' in path: - state = torch.hub.load_state_dict_from_url(path, map_location='cpu') - else: - state = torch.load(path, map_location='cpu') - - module = dist.de_parallel(self.model) - - # TODO hard code - if 'ema' in state: - stat, infos = self._matched_state(module.state_dict(), state['ema']['module']) - else: - stat, infos = self._matched_state(module.state_dict(), state['model']) - - module.load_state_dict(stat, strict=False) - print(f'Load model.state_dict, {infos}') - - @staticmethod - def _matched_state(state: Dict[str, torch.Tensor], params: Dict[str, torch.Tensor]): - missed_list = [] - unmatched_list = [] - matched_state = {} - for k, v in state.items(): - if k in params: - if v.shape == params[k].shape: - matched_state[k] = params[k] - else: - unmatched_list.append(k) - else: - missed_list.append(k) - - return matched_state, {'missed': missed_list, 'unmatched': unmatched_list} - - - def fit(self, ): - raise NotImplementedError('') - - def val(self, ): - raise NotImplementedError('') diff --git a/src2/zoo/__init__.py b/src2/zoo/__init__.py deleted file mode 100644 index e6c56d9a47b56332e968dad230c1feeff5a6d7c7..0000000000000000000000000000000000000000 --- a/src2/zoo/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ - -from .rtdetr import * diff --git a/src2/zoo/__pycache__/__init__.cpython-310.pyc b/src2/zoo/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index f87e946803139840579325f6a731195415f7bf1a..0000000000000000000000000000000000000000 Binary files a/src2/zoo/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/src2/zoo/rtdetr/__init__.py b/src2/zoo/rtdetr/__init__.py deleted file mode 100644 index 1b4583b3b86e7c2ba4044f73d51e1254ca327fe5..0000000000000000000000000000000000000000 --- a/src2/zoo/rtdetr/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -"""by lyuwenyu -""" - - -from .rtdetr import * - -from .hybrid_encoder import * -from .rtdetr_decoder import * -from .rtdetr_postprocessor import * -from .rtdetr_criterion import * - -from .matcher import * diff --git a/src2/zoo/rtdetr/__pycache__/__init__.cpython-310.pyc b/src2/zoo/rtdetr/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 55915be0c74facacf9316e8f7a9d37b1bab7fa6e..0000000000000000000000000000000000000000 Binary files a/src2/zoo/rtdetr/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/src2/zoo/rtdetr/__pycache__/box_ops.cpython-310.pyc b/src2/zoo/rtdetr/__pycache__/box_ops.cpython-310.pyc deleted file mode 100644 index 7e7bc53ff302d7c443a72130257a90f6b059f166..0000000000000000000000000000000000000000 Binary files a/src2/zoo/rtdetr/__pycache__/box_ops.cpython-310.pyc and /dev/null differ diff --git a/src2/zoo/rtdetr/__pycache__/denoising.cpython-310.pyc b/src2/zoo/rtdetr/__pycache__/denoising.cpython-310.pyc deleted file mode 100644 index cb7a0f7ce07129eb92c92be9675511bcf318ddb4..0000000000000000000000000000000000000000 Binary files a/src2/zoo/rtdetr/__pycache__/denoising.cpython-310.pyc and /dev/null differ diff --git a/src2/zoo/rtdetr/__pycache__/hybrid_encoder.cpython-310.pyc b/src2/zoo/rtdetr/__pycache__/hybrid_encoder.cpython-310.pyc deleted file mode 100644 index 3dd43758edf71ff8d71b33cc70f64111cc9cd0bf..0000000000000000000000000000000000000000 Binary files a/src2/zoo/rtdetr/__pycache__/hybrid_encoder.cpython-310.pyc and /dev/null differ diff --git a/src2/zoo/rtdetr/__pycache__/matcher.cpython-310.pyc b/src2/zoo/rtdetr/__pycache__/matcher.cpython-310.pyc deleted file mode 100644 index 0a1580e44731c7a09b699073a6d5f13df66e7bba..0000000000000000000000000000000000000000 Binary files a/src2/zoo/rtdetr/__pycache__/matcher.cpython-310.pyc and /dev/null differ diff --git a/src2/zoo/rtdetr/__pycache__/rtdetr.cpython-310.pyc b/src2/zoo/rtdetr/__pycache__/rtdetr.cpython-310.pyc deleted file mode 100644 index 97cfebdf5fb434e4899051354fab3f201632c3a3..0000000000000000000000000000000000000000 Binary files a/src2/zoo/rtdetr/__pycache__/rtdetr.cpython-310.pyc and /dev/null differ diff --git a/src2/zoo/rtdetr/__pycache__/rtdetr_criterion.cpython-310.pyc b/src2/zoo/rtdetr/__pycache__/rtdetr_criterion.cpython-310.pyc deleted file mode 100644 index d8da511973e6f9fe191a4a139b121bbb08becbf6..0000000000000000000000000000000000000000 Binary files a/src2/zoo/rtdetr/__pycache__/rtdetr_criterion.cpython-310.pyc and /dev/null differ diff --git a/src2/zoo/rtdetr/__pycache__/rtdetr_decoder.cpython-310.pyc b/src2/zoo/rtdetr/__pycache__/rtdetr_decoder.cpython-310.pyc deleted file mode 100644 index d22961502e5e256188ed0bf9b69ab2e8bc49060f..0000000000000000000000000000000000000000 Binary files a/src2/zoo/rtdetr/__pycache__/rtdetr_decoder.cpython-310.pyc and /dev/null differ diff --git a/src2/zoo/rtdetr/__pycache__/rtdetr_postprocessor.cpython-310.pyc b/src2/zoo/rtdetr/__pycache__/rtdetr_postprocessor.cpython-310.pyc deleted file mode 100644 index 6c7780f0e9daa9166fd5da6e776432348092f583..0000000000000000000000000000000000000000 Binary files a/src2/zoo/rtdetr/__pycache__/rtdetr_postprocessor.cpython-310.pyc and /dev/null differ diff --git a/src2/zoo/rtdetr/__pycache__/utils.cpython-310.pyc b/src2/zoo/rtdetr/__pycache__/utils.cpython-310.pyc deleted file mode 100644 index f39417e3c51c33bf1e9e688f7386cefd3c03aa57..0000000000000000000000000000000000000000 Binary files a/src2/zoo/rtdetr/__pycache__/utils.cpython-310.pyc and /dev/null differ diff --git a/src2/zoo/rtdetr/box_ops.py b/src2/zoo/rtdetr/box_ops.py deleted file mode 100644 index 5d65866556d14cc866eac5d597d8a191528c65dc..0000000000000000000000000000000000000000 --- a/src2/zoo/rtdetr/box_ops.py +++ /dev/null @@ -1,89 +0,0 @@ -''' -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -https://github.com/facebookresearch/detr/blob/main/util/box_ops.py -''' - -import torch -from torchvision.ops.boxes import box_area - - -def box_cxcywh_to_xyxy(x): - x_c, y_c, w, h = x.unbind(-1) - b = [(x_c - 0.5 * w), (y_c - 0.5 * h), - (x_c + 0.5 * w), (y_c + 0.5 * h)] - return torch.stack(b, dim=-1) - - -def box_xyxy_to_cxcywh(x): - x0, y0, x1, y1 = x.unbind(-1) - b = [(x0 + x1) / 2, (y0 + y1) / 2, - (x1 - x0), (y1 - y0)] - return torch.stack(b, dim=-1) - - -# modified from torchvision to also return the union -def box_iou(boxes1, boxes2): - area1 = box_area(boxes1) - area2 = box_area(boxes2) - - lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2] - rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2] - - wh = (rb - lt).clamp(min=0) # [N,M,2] - inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] - - union = area1[:, None] + area2 - inter - - iou = inter / union - return iou, union - - -def generalized_box_iou(boxes1, boxes2): - """ - Generalized IoU from https://giou.stanford.edu/ - - The boxes should be in [x0, y0, x1, y1] format - - Returns a [N, M] pairwise matrix, where N = len(boxes1) - and M = len(boxes2) - """ - # degenerate boxes gives inf / nan results - # so do an early check - assert (boxes1[:, 2:] >= boxes1[:, :2]).all() - assert (boxes2[:, 2:] >= boxes2[:, :2]).all() - iou, union = box_iou(boxes1, boxes2) - - lt = torch.min(boxes1[:, None, :2], boxes2[:, :2]) - rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) - - wh = (rb - lt).clamp(min=0) # [N,M,2] - area = wh[:, :, 0] * wh[:, :, 1] - - return iou - (area - union) / area - - -def masks_to_boxes(masks): - """Compute the bounding boxes around the provided masks - - The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions. - - Returns a [N, 4] tensors, with the boxes in xyxy format - """ - if masks.numel() == 0: - return torch.zeros((0, 4), device=masks.device) - - h, w = masks.shape[-2:] - - y = torch.arange(0, h, dtype=torch.float) - x = torch.arange(0, w, dtype=torch.float) - y, x = torch.meshgrid(y, x) - - x_mask = (masks * x.unsqueeze(0)) - x_max = x_mask.flatten(1).max(-1)[0] - x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] - - y_mask = (masks * y.unsqueeze(0)) - y_max = y_mask.flatten(1).max(-1)[0] - y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] - - return torch.stack([x_min, y_min, x_max, y_max], 1) \ No newline at end of file diff --git a/src2/zoo/rtdetr/denoising.py b/src2/zoo/rtdetr/denoising.py deleted file mode 100644 index 68307522f8152c8c224a2c38b9d48abeea2b046a..0000000000000000000000000000000000000000 --- a/src2/zoo/rtdetr/denoising.py +++ /dev/null @@ -1,125 +0,0 @@ -"""by lyuwenyu -""" - -import torch - -from .utils import inverse_sigmoid -from .box_ops import box_cxcywh_to_xyxy, box_xyxy_to_cxcywh - - - -def get_contrastive_denoising_training_group(targets, - num_classes, - num_queries, - class_embed, - num_denoising=100, - label_noise_ratio=0.5, - box_noise_scale=1.0,): - """cnd""" - if num_denoising <= 0: - return None, None, None, None - - num_gts = [len(t['labels']) for t in targets] - device = targets[0]['labels'].device - - max_gt_num = max(num_gts) - if max_gt_num == 0: - return None, None, None, None - - num_group = num_denoising // max_gt_num - num_group = 1 if num_group == 0 else num_group - # pad gt to max_num of a batch - bs = len(num_gts) - - input_query_class = torch.full([bs, max_gt_num], num_classes, dtype=torch.int32, device=device) - input_query_bbox = torch.zeros([bs, max_gt_num, 4], device=device) - pad_gt_mask = torch.zeros([bs, max_gt_num], dtype=torch.bool, device=device) - - for i in range(bs): - num_gt = num_gts[i] - if num_gt > 0: - input_query_class[i, :num_gt] = targets[i]['labels'] - input_query_bbox[i, :num_gt] = targets[i]['boxes'] - pad_gt_mask[i, :num_gt] = 1 - # each group has positive and negative queries. - input_query_class = input_query_class.tile([1, 2 * num_group]) - input_query_bbox = input_query_bbox.tile([1, 2 * num_group, 1]) - pad_gt_mask = pad_gt_mask.tile([1, 2 * num_group]) - # positive and negative mask - negative_gt_mask = torch.zeros([bs, max_gt_num * 2, 1], device=device) - negative_gt_mask[:, max_gt_num:] = 1 - negative_gt_mask = negative_gt_mask.tile([1, num_group, 1]) - positive_gt_mask = 1 - negative_gt_mask - # contrastive denoising training positive index - positive_gt_mask = positive_gt_mask.squeeze(-1) * pad_gt_mask - dn_positive_idx = torch.nonzero(positive_gt_mask)[:, 1] - dn_positive_idx = torch.split(dn_positive_idx, [n * num_group for n in num_gts]) - # total denoising queries - num_denoising = int(max_gt_num * 2 * num_group) - - if label_noise_ratio > 0: - mask = torch.rand_like(input_query_class, dtype=torch.float) < (label_noise_ratio * 0.5) - # randomly put a new one here - new_label = torch.randint_like(mask, 0, num_classes, dtype=input_query_class.dtype) - input_query_class = torch.where(mask & pad_gt_mask, new_label, input_query_class) - - # if label_noise_ratio > 0: - # input_query_class = input_query_class.flatten() - # pad_gt_mask = pad_gt_mask.flatten() - # # half of bbox prob - # # mask = torch.rand(input_query_class.shape, device=device) < (label_noise_ratio * 0.5) - # mask = torch.rand_like(input_query_class) < (label_noise_ratio * 0.5) - # chosen_idx = torch.nonzero(mask * pad_gt_mask).squeeze(-1) - # # randomly put a new one here - # new_label = torch.randint_like(chosen_idx, 0, num_classes, dtype=input_query_class.dtype) - # # input_query_class.scatter_(dim=0, index=chosen_idx, value=new_label) - # input_query_class[chosen_idx] = new_label - # input_query_class = input_query_class.reshape(bs, num_denoising) - # pad_gt_mask = pad_gt_mask.reshape(bs, num_denoising) - - if box_noise_scale > 0: - known_bbox = box_cxcywh_to_xyxy(input_query_bbox) - diff = torch.tile(input_query_bbox[..., 2:] * 0.5, [1, 1, 2]) * box_noise_scale - rand_sign = torch.randint_like(input_query_bbox, 0, 2) * 2.0 - 1.0 - rand_part = torch.rand_like(input_query_bbox) - rand_part = (rand_part + 1.0) * negative_gt_mask + rand_part * (1 - negative_gt_mask) - rand_part *= rand_sign - known_bbox += rand_part * diff - known_bbox.clip_(min=0.0, max=1.0) - input_query_bbox = box_xyxy_to_cxcywh(known_bbox) - input_query_bbox = inverse_sigmoid(input_query_bbox) - - # class_embed = torch.concat([class_embed, torch.zeros([1, class_embed.shape[-1]], device=device)]) - # input_query_class = torch.gather( - # class_embed, input_query_class.flatten(), - # axis=0).reshape(bs, num_denoising, -1) - # input_query_class = class_embed(input_query_class.flatten()).reshape(bs, num_denoising, -1) - input_query_class = class_embed(input_query_class) - - tgt_size = num_denoising + num_queries - # attn_mask = torch.ones([tgt_size, tgt_size], device=device) < 0 - attn_mask = torch.full([tgt_size, tgt_size], False, dtype=torch.bool, device=device) - # match query cannot see the reconstruction - attn_mask[num_denoising:, :num_denoising] = True - - # reconstruct cannot see each other - for i in range(num_group): - if i == 0: - attn_mask[max_gt_num * 2 * i: max_gt_num * 2 * (i + 1), max_gt_num * 2 * (i + 1): num_denoising] = True - if i == num_group - 1: - attn_mask[max_gt_num * 2 * i: max_gt_num * 2 * (i + 1), :max_gt_num * i * 2] = True - else: - attn_mask[max_gt_num * 2 * i: max_gt_num * 2 * (i + 1), max_gt_num * 2 * (i + 1): num_denoising] = True - attn_mask[max_gt_num * 2 * i: max_gt_num * 2 * (i + 1), :max_gt_num * 2 * i] = True - - dn_meta = { - "dn_positive_idx": dn_positive_idx, - "dn_num_group": num_group, - "dn_num_split": [num_denoising, num_queries] - } - - # print(input_query_class.shape) # torch.Size([4, 196, 256]) - # print(input_query_bbox.shape) # torch.Size([4, 196, 4]) - # print(attn_mask.shape) # torch.Size([496, 496]) - - return input_query_class, input_query_bbox, attn_mask, dn_meta diff --git a/src2/zoo/rtdetr/hybrid_encoder.py b/src2/zoo/rtdetr/hybrid_encoder.py deleted file mode 100644 index 804db69c120bc41c8e9c1b9f81e436c87323609f..0000000000000000000000000000000000000000 --- a/src2/zoo/rtdetr/hybrid_encoder.py +++ /dev/null @@ -1,322 +0,0 @@ -'''by lyuwenyu -''' - -import copy -import torch -import torch.nn as nn -import torch.nn.functional as F - -from .utils import get_activation - -from src.core import register - - -__all__ = ['HybridEncoder'] - - - -class ConvNormLayer(nn.Module): - def __init__(self, ch_in, ch_out, kernel_size, stride, padding=None, bias=False, act=None): - super().__init__() - self.conv = nn.Conv2d( - ch_in, - ch_out, - kernel_size, - stride, - padding=(kernel_size-1)//2 if padding is None else padding, - bias=bias) - self.norm = nn.BatchNorm2d(ch_out) - self.act = nn.Identity() if act is None else get_activation(act) - - def forward(self, x): - return self.act(self.norm(self.conv(x))) - - -class RepVggBlock(nn.Module): - def __init__(self, ch_in, ch_out, act='relu'): - super().__init__() - self.ch_in = ch_in - self.ch_out = ch_out - self.conv1 = ConvNormLayer(ch_in, ch_out, 3, 1, padding=1, act=None) - self.conv2 = ConvNormLayer(ch_in, ch_out, 1, 1, padding=0, act=None) - self.act = nn.Identity() if act is None else get_activation(act) - - def forward(self, x): - if hasattr(self, 'conv'): - y = self.conv(x) - else: - y = self.conv1(x) + self.conv2(x) - - return self.act(y) - - def convert_to_deploy(self): - if not hasattr(self, 'conv'): - self.conv = nn.Conv2d(self.ch_in, self.ch_out, 3, 1, padding=1) - - kernel, bias = self.get_equivalent_kernel_bias() - self.conv.weight.data = kernel - self.conv.bias.data = bias - # self.__delattr__('conv1') - # self.__delattr__('conv2') - - def get_equivalent_kernel_bias(self): - kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1) - kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2) - - return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1), bias3x3 + bias1x1 - - def _pad_1x1_to_3x3_tensor(self, kernel1x1): - if kernel1x1 is None: - return 0 - else: - return F.pad(kernel1x1, [1, 1, 1, 1]) - - def _fuse_bn_tensor(self, branch: ConvNormLayer): - if branch is None: - return 0, 0 - kernel = branch.conv.weight - running_mean = branch.norm.running_mean - running_var = branch.norm.running_var - gamma = branch.norm.weight - beta = branch.norm.bias - eps = branch.norm.eps - std = (running_var + eps).sqrt() - t = (gamma / std).reshape(-1, 1, 1, 1) - return kernel * t, beta - running_mean * gamma / std - - -class CSPRepLayer(nn.Module): - def __init__(self, - in_channels, - out_channels, - num_blocks=3, - expansion=1.0, - bias=None, - act="silu"): - super(CSPRepLayer, self).__init__() - hidden_channels = int(out_channels * expansion) - self.conv1 = ConvNormLayer(in_channels, hidden_channels, 1, 1, bias=bias, act=act) - self.conv2 = ConvNormLayer(in_channels, hidden_channels, 1, 1, bias=bias, act=act) - self.bottlenecks = nn.Sequential(*[ - RepVggBlock(hidden_channels, hidden_channels, act=act) for _ in range(num_blocks) - ]) - if hidden_channels != out_channels: - self.conv3 = ConvNormLayer(hidden_channels, out_channels, 1, 1, bias=bias, act=act) - else: - self.conv3 = nn.Identity() - - def forward(self, x): - x_1 = self.conv1(x) - x_1 = self.bottlenecks(x_1) - x_2 = self.conv2(x) - return self.conv3(x_1 + x_2) - - -# transformer -class TransformerEncoderLayer(nn.Module): - def __init__(self, - d_model, - nhead, - dim_feedforward=2048, - dropout=0.1, - activation="relu", - normalize_before=False): - super().__init__() - self.normalize_before = normalize_before - - self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout, batch_first=True) - - self.linear1 = nn.Linear(d_model, dim_feedforward) - self.dropout = nn.Dropout(dropout) - self.linear2 = nn.Linear(dim_feedforward, d_model) - - self.norm1 = nn.LayerNorm(d_model) - self.norm2 = nn.LayerNorm(d_model) - self.dropout1 = nn.Dropout(dropout) - self.dropout2 = nn.Dropout(dropout) - self.activation = get_activation(activation) - - @staticmethod - def with_pos_embed(tensor, pos_embed): - return tensor if pos_embed is None else tensor + pos_embed - - def forward(self, src, src_mask=None, pos_embed=None) -> torch.Tensor: - residual = src - if self.normalize_before: - src = self.norm1(src) - q = k = self.with_pos_embed(src, pos_embed) - src, _ = self.self_attn(q, k, value=src, attn_mask=src_mask) - - src = residual + self.dropout1(src) - if not self.normalize_before: - src = self.norm1(src) - - residual = src - if self.normalize_before: - src = self.norm2(src) - src = self.linear2(self.dropout(self.activation(self.linear1(src)))) - src = residual + self.dropout2(src) - if not self.normalize_before: - src = self.norm2(src) - return src - - -class TransformerEncoder(nn.Module): - def __init__(self, encoder_layer, num_layers, norm=None): - super(TransformerEncoder, self).__init__() - self.layers = nn.ModuleList([copy.deepcopy(encoder_layer) for _ in range(num_layers)]) - self.num_layers = num_layers - self.norm = norm - - def forward(self, src, src_mask=None, pos_embed=None) -> torch.Tensor: - output = src - for layer in self.layers: - output = layer(output, src_mask=src_mask, pos_embed=pos_embed) - - if self.norm is not None: - output = self.norm(output) - - return output - - -@register -class HybridEncoder(nn.Module): - def __init__(self, - in_channels=[512, 1024, 2048], - feat_strides=[8, 16, 32], - hidden_dim=256, - nhead=8, - dim_feedforward = 1024, - dropout=0.0, - enc_act='gelu', - use_encoder_idx=[2], - num_encoder_layers=1, - pe_temperature=10000, - expansion=1.0, - depth_mult=1.0, - act='silu', - eval_spatial_size=None): - super().__init__() - self.in_channels = in_channels - self.feat_strides = feat_strides - self.hidden_dim = hidden_dim - self.use_encoder_idx = use_encoder_idx - self.num_encoder_layers = num_encoder_layers - self.pe_temperature = pe_temperature - self.eval_spatial_size = eval_spatial_size - - self.out_channels = [hidden_dim for _ in range(len(in_channels))] - self.out_strides = feat_strides - - # channel projection - self.input_proj = nn.ModuleList() - for in_channel in in_channels: - self.input_proj.append( - nn.Sequential( - nn.Conv2d(in_channel, hidden_dim, kernel_size=1, bias=False), - nn.BatchNorm2d(hidden_dim) - ) - ) - - # encoder transformer - encoder_layer = TransformerEncoderLayer( - hidden_dim, - nhead=nhead, - dim_feedforward=dim_feedforward, - dropout=dropout, - activation=enc_act) - - self.encoder = nn.ModuleList([ - TransformerEncoder(copy.deepcopy(encoder_layer), num_encoder_layers) for _ in range(len(use_encoder_idx)) - ]) - - # top-down fpn - self.lateral_convs = nn.ModuleList() - self.fpn_blocks = nn.ModuleList() - for _ in range(len(in_channels) - 1, 0, -1): - self.lateral_convs.append(ConvNormLayer(hidden_dim, hidden_dim, 1, 1, act=act)) - self.fpn_blocks.append( - CSPRepLayer(hidden_dim * 2, hidden_dim, round(3 * depth_mult), act=act, expansion=expansion) - ) - - # bottom-up pan - self.downsample_convs = nn.ModuleList() - self.pan_blocks = nn.ModuleList() - for _ in range(len(in_channels) - 1): - self.downsample_convs.append( - ConvNormLayer(hidden_dim, hidden_dim, 3, 2, act=act) - ) - self.pan_blocks.append( - CSPRepLayer(hidden_dim * 2, hidden_dim, round(3 * depth_mult), act=act, expansion=expansion) - ) - - self._reset_parameters() - - def _reset_parameters(self): - if self.eval_spatial_size: - for idx in self.use_encoder_idx: - stride = self.feat_strides[idx] - pos_embed = self.build_2d_sincos_position_embedding( - self.eval_spatial_size[1] // stride, self.eval_spatial_size[0] // stride, - self.hidden_dim, self.pe_temperature) - setattr(self, f'pos_embed{idx}', pos_embed) - # self.register_buffer(f'pos_embed{idx}', pos_embed) - - @staticmethod - def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.): - ''' - ''' - grid_w = torch.arange(int(w), dtype=torch.float32) - grid_h = torch.arange(int(h), dtype=torch.float32) - grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing='ij') - assert embed_dim % 4 == 0, \ - 'Embed dimension must be divisible by 4 for 2D sin-cos position embedding' - pos_dim = embed_dim // 4 - omega = torch.arange(pos_dim, dtype=torch.float32) / pos_dim - omega = 1. / (temperature ** omega) - - out_w = grid_w.flatten()[..., None] @ omega[None] - out_h = grid_h.flatten()[..., None] @ omega[None] - - return torch.concat([out_w.sin(), out_w.cos(), out_h.sin(), out_h.cos()], dim=1)[None, :, :] - - def forward(self, feats): - assert len(feats) == len(self.in_channels) - proj_feats = [self.input_proj[i](feat) for i, feat in enumerate(feats)] - - # encoder - if self.num_encoder_layers > 0: - for i, enc_ind in enumerate(self.use_encoder_idx): - h, w = proj_feats[enc_ind].shape[2:] - # flatten [B, C, H, W] to [B, HxW, C] - src_flatten = proj_feats[enc_ind].flatten(2).permute(0, 2, 1) - if self.training or self.eval_spatial_size is None: - pos_embed = self.build_2d_sincos_position_embedding( - w, h, self.hidden_dim, self.pe_temperature).to(src_flatten.device) - else: - pos_embed = getattr(self, f'pos_embed{enc_ind}', None).to(src_flatten.device) - - memory = self.encoder[i](src_flatten, pos_embed=pos_embed) - proj_feats[enc_ind] = memory.permute(0, 2, 1).reshape(-1, self.hidden_dim, h, w).contiguous() - # print([x.is_contiguous() for x in proj_feats ]) - - # broadcasting and fusion - inner_outs = [proj_feats[-1]] - for idx in range(len(self.in_channels) - 1, 0, -1): - feat_high = inner_outs[0] - feat_low = proj_feats[idx - 1] - feat_high = self.lateral_convs[len(self.in_channels) - 1 - idx](feat_high) - inner_outs[0] = feat_high - upsample_feat = F.interpolate(feat_high, scale_factor=2., mode='nearest') - inner_out = self.fpn_blocks[len(self.in_channels)-1-idx](torch.concat([upsample_feat, feat_low], dim=1)) - inner_outs.insert(0, inner_out) - - outs = [inner_outs[0]] - for idx in range(len(self.in_channels) - 1): - feat_low = outs[-1] - feat_high = inner_outs[idx + 1] - downsample_feat = self.downsample_convs[idx](feat_low) - out = self.pan_blocks[idx](torch.concat([downsample_feat, feat_high], dim=1)) - outs.append(out) - - return outs diff --git a/src2/zoo/rtdetr/matcher.py b/src2/zoo/rtdetr/matcher.py deleted file mode 100644 index cf9dec1f8e030258f74d3198423186325d5f3201..0000000000000000000000000000000000000000 --- a/src2/zoo/rtdetr/matcher.py +++ /dev/null @@ -1,108 +0,0 @@ -""" -Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -Modules to compute the matching cost and solve the corresponding LSAP. - -by lyuwenyu -""" - -import torch -import torch.nn.functional as F - -from scipy.optimize import linear_sum_assignment -from torch import nn - -from .box_ops import box_cxcywh_to_xyxy, generalized_box_iou - -from src.core import register - - -@register -class HungarianMatcher(nn.Module): - """This class computes an assignment between the targets and the predictions of the network - - For efficiency reasons, the targets don't include the no_object. Because of this, in general, - there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions, - while the others are un-matched (and thus treated as non-objects). - """ - - __share__ = ['use_focal_loss', ] - - def __init__(self, weight_dict, use_focal_loss=False, alpha=0.25, gamma=2.0): - """Creates the matcher - - Params: - cost_class: This is the relative weight of the classification error in the matching cost - cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost - cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost - """ - super().__init__() - self.cost_class = weight_dict['cost_class'] - self.cost_bbox = weight_dict['cost_bbox'] - self.cost_giou = weight_dict['cost_giou'] - - self.use_focal_loss = use_focal_loss - self.alpha = alpha - self.gamma = gamma - - assert self.cost_class != 0 or self.cost_bbox != 0 or self.cost_giou != 0, "all costs cant be 0" - - @torch.no_grad() - def forward(self, outputs, targets): - """ Performs the matching - - Params: - outputs: This is a dict that contains at least these entries: - "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits - "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates - - targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing: - "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth - objects in the target) containing the class labels - "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates - - Returns: - A list of size batch_size, containing tuples of (index_i, index_j) where: - - index_i is the indices of the selected predictions (in order) - - index_j is the indices of the corresponding selected targets (in order) - For each batch element, it holds: - len(index_i) = len(index_j) = min(num_queries, num_target_boxes) - """ - bs, num_queries = outputs["pred_logits"].shape[:2] - - # We flatten to compute the cost matrices in a batch - if self.use_focal_loss: - out_prob = F.sigmoid(outputs["pred_logits"].flatten(0, 1)) - else: - out_prob = outputs["pred_logits"].flatten(0, 1).softmax(-1) # [batch_size * num_queries, num_classes] - - out_bbox = outputs["pred_boxes"].flatten(0, 1) # [batch_size * num_queries, 4] - - # Also concat the target labels and boxes - tgt_ids = torch.cat([v["labels"] for v in targets]) - tgt_bbox = torch.cat([v["boxes"] for v in targets]) - - # Compute the classification cost. Contrary to the loss, we don't use the NLL, - # but approximate it in 1 - proba[target class]. - # The 1 is a constant that doesn't change the matching, it can be ommitted. - if self.use_focal_loss: - out_prob = out_prob[:, tgt_ids] - neg_cost_class = (1 - self.alpha) * (out_prob**self.gamma) * (-(1 - out_prob + 1e-8).log()) - pos_cost_class = self.alpha * ((1 - out_prob)**self.gamma) * (-(out_prob + 1e-8).log()) - cost_class = pos_cost_class - neg_cost_class - else: - cost_class = -out_prob[:, tgt_ids] - - # Compute the L1 cost between boxes - cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1) - - # Compute the giou cost betwen boxes - cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox)) - - # Final cost matrix - C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou - C = C.view(bs, num_queries, -1).cpu() - - sizes = [len(v["boxes"]) for v in targets] - indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))] - - return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices] diff --git a/src2/zoo/rtdetr/rtdetr.py b/src2/zoo/rtdetr/rtdetr.py deleted file mode 100644 index 851d4f74bc58d38135499a94427dc707f1726013..0000000000000000000000000000000000000000 --- a/src2/zoo/rtdetr/rtdetr.py +++ /dev/null @@ -1,44 +0,0 @@ -"""by lyuwenyu -""" - -import torch -import torch.nn as nn -import torch.nn.functional as F - -import random -import numpy as np - -from src.core import register - - -__all__ = ['RTDETR', ] - - -@register -class RTDETR(nn.Module): - __inject__ = ['backbone', 'encoder', 'decoder', ] - - def __init__(self, backbone: nn.Module, encoder, decoder, multi_scale=None): - super().__init__() - self.backbone = backbone - self.decoder = decoder - self.encoder = encoder - self.multi_scale = multi_scale - - def forward(self, x, targets=None): - if self.multi_scale and self.training: - sz = np.random.choice(self.multi_scale) - x = F.interpolate(x, size=[sz, sz]) - - x = self.backbone(x) - x = self.encoder(x) - x = self.decoder(x, targets) - - return x - - def deploy(self, ): - self.eval() - for m in self.modules(): - if hasattr(m, 'convert_to_deploy'): - m.convert_to_deploy() - return self diff --git a/src2/zoo/rtdetr/rtdetr_criterion.py b/src2/zoo/rtdetr/rtdetr_criterion.py deleted file mode 100644 index 3ce77c0f160a5f2e6d6cfa1b94f943193d022306..0000000000000000000000000000000000000000 --- a/src2/zoo/rtdetr/rtdetr_criterion.py +++ /dev/null @@ -1,341 +0,0 @@ -""" -reference: -https://github.com/facebookresearch/detr/blob/main/models/detr.py - -by lyuwenyu -""" - - -import torch -import torch.nn as nn -import torch.nn.functional as F -import torchvision - -# from torchvision.ops import box_convert, generalized_box_iou -from .box_ops import box_cxcywh_to_xyxy, box_iou, generalized_box_iou - -from src.misc.dist import get_world_size, is_dist_available_and_initialized -from src.core import register - - - -@register -class SetCriterion(nn.Module): - """ This class computes the loss for DETR. - The process happens in two steps: - 1) we compute hungarian assignment between ground truth boxes and the outputs of the model - 2) we supervise each pair of matched ground-truth / prediction (supervise class and box) - """ - __share__ = ['num_classes', ] - __inject__ = ['matcher', ] - - def __init__(self, matcher, weight_dict, losses, alpha=0.2, gamma=2.0, eos_coef=1e-4, num_classes=80): - """ Create the criterion. - Parameters: - num_classes: number of object categories, omitting the special no-object category - matcher: module able to compute a matching between targets and proposals - weight_dict: dict containing as key the names of the losses and as values their relative weight. - eos_coef: relative classification weight applied to the no-object category - losses: list of all the losses to be applied. See get_loss for list of available losses. - """ - super().__init__() - self.num_classes = num_classes - self.matcher = matcher - self.weight_dict = weight_dict - self.losses = losses - - empty_weight = torch.ones(self.num_classes + 1) - empty_weight[-1] = eos_coef - self.register_buffer('empty_weight', empty_weight) - - self.alpha = alpha - self.gamma = gamma - - - def loss_labels(self, outputs, targets, indices, num_boxes, log=True): - """Classification loss (NLL) - targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes] - """ - assert 'pred_logits' in outputs - src_logits = outputs['pred_logits'] - - idx = self._get_src_permutation_idx(indices) - target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)]) - target_classes = torch.full(src_logits.shape[:2], self.num_classes, - dtype=torch.int64, device=src_logits.device) - target_classes[idx] = target_classes_o - - loss_ce = F.cross_entropy(src_logits.transpose(1, 2), target_classes, self.empty_weight) - losses = {'loss_ce': loss_ce} - - if log: - # TODO this should probably be a separate loss, not hacked in this one here - losses['class_error'] = 100 - accuracy(src_logits[idx], target_classes_o)[0] - return losses - - def loss_labels_bce(self, outputs, targets, indices, num_boxes, log=True): - src_logits = outputs['pred_logits'] - idx = self._get_src_permutation_idx(indices) - target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)]) - target_classes = torch.full(src_logits.shape[:2], self.num_classes, - dtype=torch.int64, device=src_logits.device) - target_classes[idx] = target_classes_o - - target = F.one_hot(target_classes, num_classes=self.num_classes + 1)[..., :-1] - loss = F.binary_cross_entropy_with_logits(src_logits, target * 1., reduction='none') - loss = loss.mean(1).sum() * src_logits.shape[1] / num_boxes - return {'loss_bce': loss} - - def loss_labels_focal(self, outputs, targets, indices, num_boxes, log=True): - assert 'pred_logits' in outputs - src_logits = outputs['pred_logits'] - - idx = self._get_src_permutation_idx(indices) - target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)]) - target_classes = torch.full(src_logits.shape[:2], self.num_classes, - dtype=torch.int64, device=src_logits.device) - target_classes[idx] = target_classes_o - - target = F.one_hot(target_classes, num_classes=self.num_classes+1)[..., :-1] - # ce_loss = F.binary_cross_entropy_with_logits(src_logits, target * 1., reduction="none") - # prob = F.sigmoid(src_logits) # TODO .detach() - # p_t = prob * target + (1 - prob) * (1 - target) - # alpha_t = self.alpha * target + (1 - self.alpha) * (1 - target) - # loss = alpha_t * ce_loss * ((1 - p_t) ** self.gamma) - # loss = loss.mean(1).sum() * src_logits.shape[1] / num_boxes - loss = torchvision.ops.sigmoid_focal_loss(src_logits, target, self.alpha, self.gamma, reduction='none') - loss = loss.mean(1).sum() * src_logits.shape[1] / num_boxes - - return {'loss_focal': loss} - - def loss_labels_vfl(self, outputs, targets, indices, num_boxes, log=True): - assert 'pred_boxes' in outputs - idx = self._get_src_permutation_idx(indices) - - src_boxes = outputs['pred_boxes'][idx] - target_boxes = torch.cat([t['boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0) - ious, _ = box_iou(box_cxcywh_to_xyxy(src_boxes), box_cxcywh_to_xyxy(target_boxes)) - ious = torch.diag(ious).detach() - - src_logits = outputs['pred_logits'] - target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)]) - target_classes = torch.full(src_logits.shape[:2], self.num_classes, - dtype=torch.int64, device=src_logits.device) - target_classes[idx] = target_classes_o - target = F.one_hot(target_classes, num_classes=self.num_classes + 1)[..., :-1] - - target_score_o = torch.zeros_like(target_classes, dtype=src_logits.dtype) - target_score_o[idx] = ious.to(target_score_o.dtype) - target_score = target_score_o.unsqueeze(-1) * target - - pred_score = F.sigmoid(src_logits).detach() - weight = self.alpha * pred_score.pow(self.gamma) * (1 - target) + target_score - - loss = F.binary_cross_entropy_with_logits(src_logits, target_score, weight=weight, reduction='none') - loss = loss.mean(1).sum() * src_logits.shape[1] / num_boxes - return {'loss_vfl': loss} - - @torch.no_grad() - def loss_cardinality(self, outputs, targets, indices, num_boxes): - """ Compute the cardinality error, ie the absolute error in the number of predicted non-empty boxes - This is not really a loss, it is intended for logging purposes only. It doesn't propagate gradients - """ - pred_logits = outputs['pred_logits'] - device = pred_logits.device - tgt_lengths = torch.as_tensor([len(v["labels"]) for v in targets], device=device) - # Count the number of predictions that are NOT "no-object" (which is the last class) - card_pred = (pred_logits.argmax(-1) != pred_logits.shape[-1] - 1).sum(1) - card_err = F.l1_loss(card_pred.float(), tgt_lengths.float()) - losses = {'cardinality_error': card_err} - return losses - - def loss_boxes(self, outputs, targets, indices, num_boxes): - """Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss - targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4] - The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size. - """ - assert 'pred_boxes' in outputs - idx = self._get_src_permutation_idx(indices) - src_boxes = outputs['pred_boxes'][idx] - target_boxes = torch.cat([t['boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0) - - losses = {} - - loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction='none') - losses['loss_bbox'] = loss_bbox.sum() / num_boxes - - loss_giou = 1 - torch.diag(generalized_box_iou( - box_cxcywh_to_xyxy(src_boxes), - box_cxcywh_to_xyxy(target_boxes))) - losses['loss_giou'] = loss_giou.sum() / num_boxes - return losses - - def loss_masks(self, outputs, targets, indices, num_boxes): - """Compute the losses related to the masks: the focal loss and the dice loss. - targets dicts must contain the key "masks" containing a tensor of dim [nb_target_boxes, h, w] - """ - assert "pred_masks" in outputs - - src_idx = self._get_src_permutation_idx(indices) - tgt_idx = self._get_tgt_permutation_idx(indices) - src_masks = outputs["pred_masks"] - src_masks = src_masks[src_idx] - masks = [t["masks"] for t in targets] - # TODO use valid to mask invalid areas due to padding in loss - target_masks, valid = nested_tensor_from_tensor_list(masks).decompose() - target_masks = target_masks.to(src_masks) - target_masks = target_masks[tgt_idx] - - # upsample predictions to the target size - src_masks = interpolate(src_masks[:, None], size=target_masks.shape[-2:], - mode="bilinear", align_corners=False) - src_masks = src_masks[:, 0].flatten(1) - - target_masks = target_masks.flatten(1) - target_masks = target_masks.view(src_masks.shape) - losses = { - "loss_mask": sigmoid_focal_loss(src_masks, target_masks, num_boxes), - "loss_dice": dice_loss(src_masks, target_masks, num_boxes), - } - return losses - - def _get_src_permutation_idx(self, indices): - # permute predictions following indices - batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)]) - src_idx = torch.cat([src for (src, _) in indices]) - return batch_idx, src_idx - - def _get_tgt_permutation_idx(self, indices): - # permute targets following indices - batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)]) - tgt_idx = torch.cat([tgt for (_, tgt) in indices]) - return batch_idx, tgt_idx - - def get_loss(self, loss, outputs, targets, indices, num_boxes, **kwargs): - loss_map = { - 'labels': self.loss_labels, - 'cardinality': self.loss_cardinality, - 'boxes': self.loss_boxes, - 'masks': self.loss_masks, - - 'bce': self.loss_labels_bce, - 'focal': self.loss_labels_focal, - 'vfl': self.loss_labels_vfl, - } - assert loss in loss_map, f'do you really want to compute {loss} loss?' - return loss_map[loss](outputs, targets, indices, num_boxes, **kwargs) - - def forward(self, outputs, targets): - """ This performs the loss computation. - Parameters: - outputs: dict of tensors, see the output specification of the model for the format - targets: list of dicts, such that len(targets) == batch_size. - The expected keys in each dict depends on the losses applied, see each loss' doc - """ - outputs_without_aux = {k: v for k, v in outputs.items() if 'aux' not in k} - - # Retrieve the matching between the outputs of the last layer and the targets - indices = self.matcher(outputs_without_aux, targets) - - # Compute the average number of target boxes accross all nodes, for normalization purposes - num_boxes = sum(len(t["labels"]) for t in targets) - num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device) - if is_dist_available_and_initialized(): - torch.distributed.all_reduce(num_boxes) - num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() - - # Compute all the requested losses - losses = {} - for loss in self.losses: - l_dict = self.get_loss(loss, outputs, targets, indices, num_boxes) - l_dict = {k: l_dict[k] * self.weight_dict[k] for k in l_dict if k in self.weight_dict} - losses.update(l_dict) - - # In case of auxiliary losses, we repeat this process with the output of each intermediate layer. - if 'aux_outputs' in outputs: - for i, aux_outputs in enumerate(outputs['aux_outputs']): - indices = self.matcher(aux_outputs, targets) - for loss in self.losses: - if loss == 'masks': - # Intermediate masks losses are too costly to compute, we ignore them. - continue - kwargs = {} - if loss == 'labels': - # Logging is enabled only for the last layer - kwargs = {'log': False} - - l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs) - l_dict = {k: l_dict[k] * self.weight_dict[k] for k in l_dict if k in self.weight_dict} - l_dict = {k + f'_aux_{i}': v for k, v in l_dict.items()} - losses.update(l_dict) - - # In case of cdn auxiliary losses. For rtdetr - if 'dn_aux_outputs' in outputs: - assert 'dn_meta' in outputs, '' - indices = self.get_cdn_matched_indices(outputs['dn_meta'], targets) - num_boxes = num_boxes * outputs['dn_meta']['dn_num_group'] - - for i, aux_outputs in enumerate(outputs['dn_aux_outputs']): - # indices = self.matcher(aux_outputs, targets) - for loss in self.losses: - if loss == 'masks': - # Intermediate masks losses are too costly to compute, we ignore them. - continue - kwargs = {} - if loss == 'labels': - # Logging is enabled only for the last layer - kwargs = {'log': False} - - l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs) - l_dict = {k: l_dict[k] * self.weight_dict[k] for k in l_dict if k in self.weight_dict} - l_dict = {k + f'_dn_{i}': v for k, v in l_dict.items()} - losses.update(l_dict) - - return losses - - @staticmethod - def get_cdn_matched_indices(dn_meta, targets): - '''get_cdn_matched_indices - ''' - dn_positive_idx, dn_num_group = dn_meta["dn_positive_idx"], dn_meta["dn_num_group"] - num_gts = [len(t['labels']) for t in targets] - device = targets[0]['labels'].device - - dn_match_indices = [] - for i, num_gt in enumerate(num_gts): - if num_gt > 0: - gt_idx = torch.arange(num_gt, dtype=torch.int64, device=device) - gt_idx = gt_idx.tile(dn_num_group) - assert len(dn_positive_idx[i]) == len(gt_idx) - dn_match_indices.append((dn_positive_idx[i], gt_idx)) - else: - dn_match_indices.append((torch.zeros(0, dtype=torch.int64, device=device), \ - torch.zeros(0, dtype=torch.int64, device=device))) - - return dn_match_indices - - - - - -@torch.no_grad() -def accuracy(output, target, topk=(1,)): - """Computes the precision@k for the specified values of k""" - if target.numel() == 0: - return [torch.zeros([], device=output.device)] - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - - res = [] - for k in topk: - correct_k = correct[:k].view(-1).float().sum(0) - res.append(correct_k.mul_(100.0 / batch_size)) - return res - - - - diff --git a/src2/zoo/rtdetr/rtdetr_decoder.py b/src2/zoo/rtdetr/rtdetr_decoder.py deleted file mode 100644 index 300b50ca3a73d56451be43e58d7f872ec101b314..0000000000000000000000000000000000000000 --- a/src2/zoo/rtdetr/rtdetr_decoder.py +++ /dev/null @@ -1,627 +0,0 @@ -"""by lyuwenyu -""" - -import math -import copy -from collections import OrderedDict -from typing import Optional, Tuple - -import torch -import torch.nn as nn -import torch.nn.functional as F -import torch.nn.init as init -from torch.nn.init import constant_, xavier_normal_, xavier_uniform_ -from torch.nn.parameter import Parameter - -from .denoising import get_contrastive_denoising_training_group -from .utils import deformable_attention_core_func, get_activation, inverse_sigmoid -from .utils import bias_init_with_prob -from torch.nn.modules.linear import NonDynamicallyQuantizableLinear - -from src.core import register - -import numpy as np - -import scipy.linalg as sl - -__all__ = ['RTDETRTransformer'] - - - -class MLP(nn.Module): - def __init__(self, input_dim, hidden_dim, output_dim, num_layers, act='relu'): - super().__init__() - self.num_layers = num_layers - h = [hidden_dim] * (num_layers - 1) - self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])) - self.act = nn.Identity() if act is None else get_activation(act) - - def forward(self, x): - for i, layer in enumerate(self.layers): - x = self.act(layer(x)) if i < self.num_layers - 1 else layer(x) - return x - - -class CoPE(nn.Module): - def __init__(self,npos_max,head_dim): - super(CoPE, self).__init__() - self.npos_max = npos_max #? - self.pos_emb = nn.parameter.Parameter(torch.zeros(1,head_dim,npos_max)) - - def forward(self,query,attn_logits): - #compute positions - gates = torch.sigmoid(attn_logits) #sig(qk) - pos = gates.flip(-1).cumsum(dim=-1).flip(-1) - pos = pos.clamp(max=self.npos_max-1) - #interpolate from integer positions - pos_ceil = pos.ceil().long() - pos_floor = pos.floor().long() - logits_int = torch.matmul(query,self.pos_emb) - logits_ceil = logits_int.gather(-1,pos_ceil) - logits_floor = logits_int.gather(-1,pos_floor) - w = pos-pos_floor - return logits_ceil*w+logits_floor*(1-w) - - - - -class MSDeformableAttention(nn.Module): - def __init__(self, embed_dim=256, num_heads=8, num_levels=4, num_points=4,): - """ - Multi-Scale Deformable Attention Module - """ - super(MSDeformableAttention, self).__init__() - self.embed_dim = embed_dim - self.num_heads = num_heads - self.num_levels = num_levels - self.num_points = num_points - self.total_points = num_heads * num_levels * num_points - - self.head_dim = embed_dim // num_heads - assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads" - - self.sampling_offsets = nn.Linear(embed_dim, self.total_points * 2,) - self.attention_weights = nn.Linear(embed_dim, self.total_points) - self.value_proj = nn.Linear(embed_dim, embed_dim) - self.output_proj = nn.Linear(embed_dim, embed_dim) - - self.ms_deformable_attn_core = deformable_attention_core_func - - self._reset_parameters() - - - def _reset_parameters(self): - # sampling_offsets - init.constant_(self.sampling_offsets.weight, 0) - thetas = torch.arange(self.num_heads, dtype=torch.float32) * (2.0 * math.pi / self.num_heads) - grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) - grid_init = grid_init / grid_init.abs().max(-1, keepdim=True).values - grid_init = grid_init.reshape(self.num_heads, 1, 1, 2).tile([1, self.num_levels, self.num_points, 1]) - scaling = torch.arange(1, self.num_points + 1, dtype=torch.float32).reshape(1, 1, -1, 1) - grid_init *= scaling - self.sampling_offsets.bias.data[...] = grid_init.flatten() - - # attention_weights - init.constant_(self.attention_weights.weight, 0) - init.constant_(self.attention_weights.bias, 0) - - # proj - init.xavier_uniform_(self.value_proj.weight) - init.constant_(self.value_proj.bias, 0) - init.xavier_uniform_(self.output_proj.weight) - init.constant_(self.output_proj.bias, 0) - - - def forward(self, - query, - reference_points, - value, - value_spatial_shapes, - value_mask=None): - """ - Args: - query (Tensor): [bs, query_length, C] - reference_points (Tensor): [bs, query_length, n_levels, 2], range in [0, 1], top-left (0,0), - bottom-right (1, 1), including padding area - value (Tensor): [bs, value_length, C] - value_spatial_shapes (List): [n_levels, 2], [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})] - value_level_start_index (List): [n_levels], [0, H_0*W_0, H_0*W_0+H_1*W_1, ...] - value_mask (Tensor): [bs, value_length], True for non-padding elements, False for padding elements - - Returns: - output (Tensor): [bs, Length_{query}, C] - """ - bs, Len_q = query.shape[:2] - Len_v = value.shape[1] - - value = self.value_proj(value) - if value_mask is not None: - value_mask = value_mask.astype(value.dtype).unsqueeze(-1) - value *= value_mask - value = value.reshape(bs, Len_v, self.num_heads, self.head_dim) - - sampling_offsets = self.sampling_offsets(query).reshape( - bs, Len_q, self.num_heads, self.num_levels, self.num_points, 2) - attention_weights = self.attention_weights(query).reshape( - bs, Len_q, self.num_heads, self.num_levels * self.num_points) - attention_weights = F.softmax(attention_weights, dim=-1).reshape( - bs, Len_q, self.num_heads, self.num_levels, self.num_points) - - if reference_points.shape[-1] == 2: - offset_normalizer = torch.tensor(value_spatial_shapes) - offset_normalizer = offset_normalizer.flip([1]).reshape( - 1, 1, 1, self.num_levels, 1, 2) - sampling_locations = reference_points.reshape( - bs, Len_q, 1, self.num_levels, 1, 2 - ) + sampling_offsets / offset_normalizer - elif reference_points.shape[-1] == 4: - sampling_locations = ( - reference_points[:, :, None, :, None, :2] + sampling_offsets / - self.num_points * reference_points[:, :, None, :, None, 2:] * 0.5) - else: - raise ValueError( - "Last dim of reference_points must be 2 or 4, but get {} instead.". - format(reference_points.shape[-1])) - - output = self.ms_deformable_attn_core(value, value_spatial_shapes, sampling_locations, attention_weights) - - output = self.output_proj(output) - - return output - - -class TransformerDecoderLayer(nn.Module): - def __init__(self, - d_model=256, - n_head=8, - dim_feedforward=1024, - dropout=0., - activation="relu", - n_levels=4, - n_points=4,): - super(TransformerDecoderLayer, self).__init__() - - # self attention - self.self_attn = nn.MultiheadAttention(d_model, n_head, dropout=dropout, batch_first=True) - self.dropout1 = nn.Dropout(dropout) - self.norm1 = nn.LayerNorm(d_model) - - # cross attention - self.cross_attn = MSDeformableAttention(d_model, n_head, n_levels, n_points) - self.dropout2 = nn.Dropout(dropout) - self.norm2 = nn.LayerNorm(d_model) - - # ffn - self.linear1 = nn.Linear(d_model, dim_feedforward) - self.activation = getattr(F, activation) - self.dropout3 = nn.Dropout(dropout) - self.linear2 = nn.Linear(dim_feedforward, d_model) - self.dropout4 = nn.Dropout(dropout) - self.norm3 = nn.LayerNorm(d_model) - - self.cope = CoPE(12,d_model) - - # self._reset_parameters() - - # def _reset_parameters(self): - # linear_init_(self.linear1) - # linear_init_(self.linear2) - # xavier_uniform_(self.linear1.weight) - # xavier_uniform_(self.linear2.weight) - - def with_pos_embed(self, tensor, pos): - return tensor if pos is None else tensor + pos - - def forward_ffn(self, tgt): - return self.linear2(self.dropout3(self.activation(self.linear1(tgt)))) - - def forward(self, - tgt, - reference_points, - memory, - memory_spatial_shapes, - memory_level_start_index, - attn_mask=None, - memory_mask=None, - query_pos_embed=None): - # self attention - #print(query_pos_embed.shape) - qk = torch.bmm (tgt ,tgt.transpose(-1 ,-2)) - mask = torch.tril(torch.ones_like(qk),diagonal=0) - mask = torch.log(mask) - query_pos_embed = self.cope(tgt,qk+mask) #position_embedding - - - n_tgt = tgt.cpu().detach().numpy() - - itgt = tgt.new_tensor(np.array([sl.pinv(i) for i in n_tgt])) #inv_tgt - -# print('qk:',qk.shape) -# print('tgt:',tgt.shape) -# print((query_pos_embed@itgt.transpose(-1,-2)).shape) -# print('ik:',itgt.shape) - - # print(torch.round(itgt@tgt)) - # print(tgt@tgt.transpose(-1,-2)) - - k = tgt - q = tgt + (query_pos_embed@itgt.transpose(-1,-2)) - - # print((q@(k.transpose(-1,-2))-query_pos_embed)) - - # if attn_mask is not None: - # attn_mask = torch.where( - # attn_mask.to(torch.bool), - # torch.zeros_like(attn_mask), - # torch.full_like(attn_mask, float('-inf'), dtype=tgt.dtype)) - - # q = k = self.with_pos_embed(tgt, query_pos_embed) - tgt2, _ = self.self_attn(q, k, value=tgt, attn_mask=attn_mask) - tgt = tgt + self.dropout1(tgt2) - tgt = self.norm1(tgt) - - # cross attention - tgt2 = self.cross_attn(\ - self.with_pos_embed(tgt, (query_pos_embed@itgt.transpose(-1,-2))), #self.with_pos_embed(tgt, query_pos_embed), - reference_points, - memory, - memory_spatial_shapes, - memory_mask) - tgt = tgt + self.dropout2(tgt2) - tgt = self.norm2(tgt) - - # ffn - tgt2 = self.forward_ffn(tgt) - tgt = tgt + self.dropout4(tgt2) - tgt = self.norm3(tgt) - - return tgt - - -class TransformerDecoder(nn.Module): - def __init__(self, hidden_dim, decoder_layer, num_layers, eval_idx=-1): - super(TransformerDecoder, self).__init__() - self.layers = nn.ModuleList([copy.deepcopy(decoder_layer) for _ in range(num_layers)]) - self.hidden_dim = hidden_dim - self.num_layers = num_layers - self.eval_idx = eval_idx if eval_idx >= 0 else num_layers + eval_idx - - def forward(self, - tgt, - ref_points_unact, - memory, - memory_spatial_shapes, - memory_level_start_index, - bbox_head, - score_head, - query_pos_head, - attn_mask=None, - memory_mask=None): - output = tgt - dec_out_bboxes = [] - dec_out_logits = [] - ref_points_detach = F.sigmoid(ref_points_unact) - - for i, layer in enumerate(self.layers): - ref_points_input = ref_points_detach.unsqueeze(2) - query_pos_embed = query_pos_head(ref_points_detach) - - output = layer(output, ref_points_input, memory, - memory_spatial_shapes, memory_level_start_index, - attn_mask, memory_mask, query_pos_embed) - - inter_ref_bbox = F.sigmoid(bbox_head[i](output) + inverse_sigmoid(ref_points_detach)) - - if self.training: - dec_out_logits.append(score_head[i](output)) - if i == 0: - dec_out_bboxes.append(inter_ref_bbox) - else: - dec_out_bboxes.append(F.sigmoid(bbox_head[i](output) + inverse_sigmoid(ref_points))) - - elif i == self.eval_idx: - dec_out_logits.append(score_head[i](output)) - dec_out_bboxes.append(inter_ref_bbox) - break - - ref_points = inter_ref_bbox - ref_points_detach = inter_ref_bbox.detach( - ) if self.training else inter_ref_bbox - - return torch.stack(dec_out_bboxes), torch.stack(dec_out_logits) - - -@register -class RTDETRTransformer(nn.Module): - __share__ = ['num_classes'] - def __init__(self, - num_classes=80, - hidden_dim=256, - num_queries=300, - position_embed_type='sine', - feat_channels=[512, 1024, 2048], - feat_strides=[8, 16, 32], - num_levels=3, - num_decoder_points=4, - nhead=8, - num_decoder_layers=6, - dim_feedforward=1024, - dropout=0., - activation="relu", - num_denoising=100, - label_noise_ratio=0.5, - box_noise_scale=1.0, - learnt_init_query=False, - eval_spatial_size=None, - eval_idx=-1, - eps=1e-2, - aux_loss=True): - - super(RTDETRTransformer, self).__init__() - assert position_embed_type in ['sine', 'learned'], \ - f'ValueError: position_embed_type not supported {position_embed_type}!' - assert len(feat_channels) <= num_levels - assert len(feat_strides) == len(feat_channels) - for _ in range(num_levels - len(feat_strides)): - feat_strides.append(feat_strides[-1] * 2) - - self.hidden_dim = hidden_dim - self.nhead = nhead - self.feat_strides = feat_strides - self.num_levels = num_levels - self.num_classes = num_classes - self.num_queries = num_queries - self.eps = eps - self.num_decoder_layers = num_decoder_layers - self.eval_spatial_size = eval_spatial_size - self.aux_loss = aux_loss - - # backbone feature projection - self._build_input_proj_layer(feat_channels) - - # Transformer module - decoder_layer = TransformerDecoderLayer(hidden_dim, nhead, dim_feedforward, dropout, activation, num_levels, num_decoder_points) - self.decoder = TransformerDecoder(hidden_dim, decoder_layer, num_decoder_layers, eval_idx) - - self.num_denoising = num_denoising - self.label_noise_ratio = label_noise_ratio - self.box_noise_scale = box_noise_scale - # denoising part - if num_denoising > 0: - # self.denoising_class_embed = nn.Embedding(num_classes, hidden_dim, padding_idx=num_classes-1) # TODO for load paddle weights - self.denoising_class_embed = nn.Embedding(num_classes+1, hidden_dim, padding_idx=num_classes) - - # decoder embedding - self.learnt_init_query = learnt_init_query - if learnt_init_query: - self.tgt_embed = nn.Embedding(num_queries, hidden_dim) - self.query_pos_head = MLP(4, 2 * hidden_dim, hidden_dim, num_layers=2) - - # encoder head - self.enc_output = nn.Sequential( - nn.Linear(hidden_dim, hidden_dim), - nn.LayerNorm(hidden_dim,) - ) - self.enc_score_head = nn.Linear(hidden_dim, num_classes) - self.enc_bbox_head = MLP(hidden_dim, hidden_dim, 4, num_layers=3) - - # decoder head - self.dec_score_head = nn.ModuleList([ - nn.Linear(hidden_dim, num_classes) - for _ in range(num_decoder_layers) - ]) - self.dec_bbox_head = nn.ModuleList([ - MLP(hidden_dim, hidden_dim, 4, num_layers=3) - for _ in range(num_decoder_layers) - ]) - - # init encoder output anchors and valid_mask - if self.eval_spatial_size: - self.anchors, self.valid_mask = self._generate_anchors() - - self._reset_parameters() - - def _reset_parameters(self): - bias = bias_init_with_prob(0.01) - - init.constant_(self.enc_score_head.bias, bias) - init.constant_(self.enc_bbox_head.layers[-1].weight, 0) - init.constant_(self.enc_bbox_head.layers[-1].bias, 0) - - for cls_, reg_ in zip(self.dec_score_head, self.dec_bbox_head): - init.constant_(cls_.bias, bias) - init.constant_(reg_.layers[-1].weight, 0) - init.constant_(reg_.layers[-1].bias, 0) - - # linear_init_(self.enc_output[0]) - init.xavier_uniform_(self.enc_output[0].weight) - if self.learnt_init_query: - init.xavier_uniform_(self.tgt_embed.weight) - init.xavier_uniform_(self.query_pos_head.layers[0].weight) - init.xavier_uniform_(self.query_pos_head.layers[1].weight) - - - def _build_input_proj_layer(self, feat_channels): - self.input_proj = nn.ModuleList() - for in_channels in feat_channels: - self.input_proj.append( - nn.Sequential(OrderedDict([ - ('conv', nn.Conv2d(in_channels, self.hidden_dim, 1, bias=False)), - ('norm', nn.BatchNorm2d(self.hidden_dim,))]) - ) - ) - - in_channels = feat_channels[-1] - - for _ in range(self.num_levels - len(feat_channels)): - self.input_proj.append( - nn.Sequential(OrderedDict([ - ('conv', nn.Conv2d(in_channels, self.hidden_dim, 3, 2, padding=1, bias=False)), - ('norm', nn.BatchNorm2d(self.hidden_dim))]) - ) - ) - in_channels = self.hidden_dim - - def _get_encoder_input(self, feats): - # get projection features - proj_feats = [self.input_proj[i](feat) for i, feat in enumerate(feats)] - if self.num_levels > len(proj_feats): - len_srcs = len(proj_feats) - for i in range(len_srcs, self.num_levels): - if i == len_srcs: - proj_feats.append(self.input_proj[i](feats[-1])) - else: - proj_feats.append(self.input_proj[i](proj_feats[-1])) - - # get encoder inputs - feat_flatten = [] - spatial_shapes = [] - level_start_index = [0, ] - for i, feat in enumerate(proj_feats): - _, _, h, w = feat.shape - # [b, c, h, w] -> [b, h*w, c] - feat_flatten.append(feat.flatten(2).permute(0, 2, 1)) - # [num_levels, 2] - spatial_shapes.append([h, w]) - # [l], start index of each level - level_start_index.append(h * w + level_start_index[-1]) - - # [b, l, c] - feat_flatten = torch.concat(feat_flatten, 1) - level_start_index.pop() - return (feat_flatten, spatial_shapes, level_start_index) - - def _generate_anchors(self, - spatial_shapes=None, - grid_size=0.05, - dtype=torch.float32, - device='cpu'): - if spatial_shapes is None: - spatial_shapes = [[int(self.eval_spatial_size[0] / s), int(self.eval_spatial_size[1] / s)] - for s in self.feat_strides - ] - anchors = [] - for lvl, (h, w) in enumerate(spatial_shapes): - grid_y, grid_x = torch.meshgrid(\ - torch.arange(end=h, dtype=dtype), \ - torch.arange(end=w, dtype=dtype), indexing='ij') - grid_xy = torch.stack([grid_x, grid_y], -1) - valid_WH = torch.tensor([w, h]).to(dtype) - grid_xy = (grid_xy.unsqueeze(0) + 0.5) / valid_WH - wh = torch.ones_like(grid_xy) * grid_size * (2.0 ** lvl) - anchors.append(torch.concat([grid_xy, wh], -1).reshape(-1, h * w, 4)) - - anchors = torch.concat(anchors, 1).to(device) - valid_mask = ((anchors > self.eps) * (anchors < 1 - self.eps)).all(-1, keepdim=True) - anchors = torch.log(anchors / (1 - anchors)) - # anchors = torch.where(valid_mask, anchors, float('inf')) - # anchors[valid_mask] = torch.inf # valid_mask [1, 8400, 1] - anchors = torch.where(valid_mask, anchors, torch.inf) - - return anchors, valid_mask - - - def _get_decoder_input(self, - memory, - spatial_shapes, - denoising_class=None, - denoising_bbox_unact=None): - bs, _, _ = memory.shape - # prepare input for decoder - if self.training or self.eval_spatial_size is None: - anchors, valid_mask = self._generate_anchors(spatial_shapes, device=memory.device) - else: - anchors, valid_mask = self.anchors.to(memory.device), self.valid_mask.to(memory.device) - - # memory = torch.where(valid_mask, memory, 0) - memory = valid_mask.to(memory.dtype) * memory # TODO fix type error for onnx export - - output_memory = self.enc_output(memory) - - enc_outputs_class = self.enc_score_head(output_memory) - enc_outputs_coord_unact = self.enc_bbox_head(output_memory) + anchors - - _, topk_ind = torch.topk(enc_outputs_class.max(-1).values, self.num_queries, dim=1) - - reference_points_unact = enc_outputs_coord_unact.gather(dim=1, \ - index=topk_ind.unsqueeze(-1).repeat(1, 1, enc_outputs_coord_unact.shape[-1])) - - enc_topk_bboxes = F.sigmoid(reference_points_unact) - if denoising_bbox_unact is not None: - reference_points_unact = torch.concat( - [denoising_bbox_unact, reference_points_unact], 1) - - enc_topk_logits = enc_outputs_class.gather(dim=1, \ - index=topk_ind.unsqueeze(-1).repeat(1, 1, enc_outputs_class.shape[-1])) - - # extract region features - if self.learnt_init_query: - target = self.tgt_embed.weight.unsqueeze(0).tile([bs, 1, 1]) - else: - target = output_memory.gather(dim=1, \ - index=topk_ind.unsqueeze(-1).repeat(1, 1, output_memory.shape[-1])) - target = target.detach() - - if denoising_class is not None: - target = torch.concat([denoising_class, target], 1) - - return target, reference_points_unact.detach(), enc_topk_bboxes, enc_topk_logits - - - def forward(self, feats, targets=None): - - # input projection and embedding - (memory, spatial_shapes, level_start_index) = self._get_encoder_input(feats) - - # prepare denoising training - if self.training and self.num_denoising > 0: - denoising_class, denoising_bbox_unact, attn_mask, dn_meta = \ - get_contrastive_denoising_training_group(targets, \ - self.num_classes, - self.num_queries, - self.denoising_class_embed, - num_denoising=self.num_denoising, - label_noise_ratio=self.label_noise_ratio, - box_noise_scale=self.box_noise_scale, ) - else: - denoising_class, denoising_bbox_unact, attn_mask, dn_meta = None, None, None, None - - target, init_ref_points_unact, enc_topk_bboxes, enc_topk_logits = \ - self._get_decoder_input(memory, spatial_shapes, denoising_class, denoising_bbox_unact) - - # decoder - out_bboxes, out_logits = self.decoder( - target, - init_ref_points_unact, - memory, - spatial_shapes, - level_start_index, - self.dec_bbox_head, - self.dec_score_head, - self.query_pos_head, - attn_mask=attn_mask) - - if self.training and dn_meta is not None: - dn_out_bboxes, out_bboxes = torch.split(out_bboxes, dn_meta['dn_num_split'], dim=2) - dn_out_logits, out_logits = torch.split(out_logits, dn_meta['dn_num_split'], dim=2) - - out = {'pred_logits': out_logits[-1], 'pred_boxes': out_bboxes[-1]} - - if self.training and self.aux_loss: - out['aux_outputs'] = self._set_aux_loss(out_logits[:-1], out_bboxes[:-1]) - out['aux_outputs'].extend(self._set_aux_loss([enc_topk_logits], [enc_topk_bboxes])) - - if self.training and dn_meta is not None: - out['dn_aux_outputs'] = self._set_aux_loss(dn_out_logits, dn_out_bboxes) - out['dn_meta'] = dn_meta - - return out - - - @torch.jit.unused - def _set_aux_loss(self, outputs_class, outputs_coord): - # this is a workaround to make torchscript happy, as torchscript - # doesn't support dictionary with non-homogeneous values, such - # as a dict having both a Tensor and a list. - return [{'pred_logits': a, 'pred_boxes': b} - for a, b in zip(outputs_class, outputs_coord)] diff --git a/src2/zoo/rtdetr/rtdetr_postprocessor.py b/src2/zoo/rtdetr/rtdetr_postprocessor.py deleted file mode 100644 index 344d69ac3b24f431a1c06cf222e2f6e540f36552..0000000000000000000000000000000000000000 --- a/src2/zoo/rtdetr/rtdetr_postprocessor.py +++ /dev/null @@ -1,80 +0,0 @@ -"""by lyuwenyu -""" - -import torch -import torch.nn as nn -import torch.nn.functional as F - -import torchvision - -from src.core import register - - -__all__ = ['RTDETRPostProcessor'] - - -@register -class RTDETRPostProcessor(nn.Module): - __share__ = ['num_classes', 'use_focal_loss', 'num_top_queries', 'remap_mscoco_category'] - - def __init__(self, num_classes=80, use_focal_loss=True, num_top_queries=300, remap_mscoco_category=False) -> None: - super().__init__() - self.use_focal_loss = use_focal_loss - self.num_top_queries = num_top_queries - self.num_classes = num_classes - self.remap_mscoco_category = remap_mscoco_category - self.deploy_mode = False - - def extra_repr(self) -> str: - return f'use_focal_loss={self.use_focal_loss}, num_classes={self.num_classes}, num_top_queries={self.num_top_queries}' - - # def forward(self, outputs, orig_target_sizes): - def forward(self, outputs, orig_target_sizes): - - logits, boxes = outputs['pred_logits'], outputs['pred_boxes'] - # orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) - - bbox_pred = torchvision.ops.box_convert(boxes, in_fmt='cxcywh', out_fmt='xyxy') - bbox_pred *= orig_target_sizes.repeat(1, 2).unsqueeze(1) - - if self.use_focal_loss: - scores = F.sigmoid(logits) - scores, index = torch.topk(scores.flatten(1), self.num_top_queries, axis=-1) - labels = index % self.num_classes - index = index // self.num_classes - boxes = bbox_pred.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, bbox_pred.shape[-1])) - - else: - scores = F.softmax(logits)[:, :, :-1] - scores, labels = scores.max(dim=-1) - if scores.shape[1] > self.num_top_queries: - scores, index = torch.topk(scores, self.num_top_queries, dim=-1) - labels = torch.gather(labels, dim=1, index=index) - boxes = torch.gather(boxes, dim=1, index=index.unsqueeze(-1).tile(1, 1, boxes.shape[-1])) - - # TODO for onnx export - if self.deploy_mode: - return labels, boxes, scores - - # TODO - if self.remap_mscoco_category: - from ...data.coco import mscoco_label2category - labels = torch.tensor([mscoco_label2category[int(x.item())] for x in labels.flatten()])\ - .to(boxes.device).reshape(labels.shape) - - results = [] - for lab, box, sco in zip(labels, boxes, scores): - result = dict(labels=lab, boxes=box, scores=sco) - results.append(result) - - return results - - - def deploy(self, ): - self.eval() - self.deploy_mode = True - return self - - @property - def iou_types(self, ): - return ('bbox', ) diff --git a/src2/zoo/rtdetr/utils.py b/src2/zoo/rtdetr/utils.py deleted file mode 100644 index 4f44cc52c2f0b8ebed01cbb7c49b2317954a7582..0000000000000000000000000000000000000000 --- a/src2/zoo/rtdetr/utils.py +++ /dev/null @@ -1,101 +0,0 @@ -"""by lyuwenyu -""" - -import math -import torch -import torch.nn as nn -import torch.nn.functional as F - - -def inverse_sigmoid(x: torch.Tensor, eps: float=1e-5) -> torch.Tensor: - x = x.clip(min=0., max=1.) - return torch.log(x.clip(min=eps) / (1 - x).clip(min=eps)) - - -def deformable_attention_core_func(value, value_spatial_shapes, sampling_locations, attention_weights): - """ - Args: - value (Tensor): [bs, value_length, n_head, c] - value_spatial_shapes (Tensor|List): [n_levels, 2] - value_level_start_index (Tensor|List): [n_levels] - sampling_locations (Tensor): [bs, query_length, n_head, n_levels, n_points, 2] - attention_weights (Tensor): [bs, query_length, n_head, n_levels, n_points] - - Returns: - output (Tensor): [bs, Length_{query}, C] - """ - bs, _, n_head, c = value.shape - _, Len_q, _, n_levels, n_points, _ = sampling_locations.shape - - split_shape = [h * w for h, w in value_spatial_shapes] - value_list = value.split(split_shape, dim=1) - sampling_grids = 2 * sampling_locations - 1 - sampling_value_list = [] - for level, (h, w) in enumerate(value_spatial_shapes): - # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_ - value_l_ = value_list[level].flatten(2).permute( - 0, 2, 1).reshape(bs * n_head, c, h, w) - # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2 - sampling_grid_l_ = sampling_grids[:, :, :, level].permute( - 0, 2, 1, 3, 4).flatten(0, 1) - # N_*M_, D_, Lq_, P_ - sampling_value_l_ = F.grid_sample( - value_l_, - sampling_grid_l_, - mode='bilinear', - padding_mode='zeros', - align_corners=False) - sampling_value_list.append(sampling_value_l_) - # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_*M_, 1, Lq_, L_*P_) - attention_weights = attention_weights.permute(0, 2, 1, 3, 4).reshape( - bs * n_head, 1, Len_q, n_levels * n_points) - output = (torch.stack( - sampling_value_list, dim=-2).flatten(-2) * - attention_weights).sum(-1).reshape(bs, n_head * c, Len_q) - - return output.permute(0, 2, 1) - - -import math -def bias_init_with_prob(prior_prob=0.01): - """initialize conv/fc bias value according to a given probability value.""" - bias_init = float(-math.log((1 - prior_prob) / prior_prob)) - return bias_init - - - -def get_activation(act: str, inpace: bool=True): - '''get activation - ''' - act = act.lower() - - if act == 'silu': - m = nn.SiLU() - - elif act == 'relu': - m = nn.ReLU() - - elif act == 'leaky_relu': - m = nn.LeakyReLU() - - elif act == 'silu': - m = nn.SiLU() - - elif act == 'gelu': - m = nn.GELU() - - elif act is None: - m = nn.Identity() - - elif isinstance(act, nn.Module): - m = act - - else: - raise RuntimeError('') - - if hasattr(m, 'inplace'): - m.inplace = inpace - - return m - -