File size: 9,289 Bytes
814a594 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
# --------------------------------------------------------
# X-Decoder -- Generalized Decoding for Pixel, Image, and Language
# Copyright (c) 2022 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Modified by Xueyan Zou ([email protected])
# --------------------------------------------------------
import logging
import time
import datetime
import json
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from typing import Tuple, Dict, List, Union
from infinibatch import iterators
from trainer.default_trainer import DefaultTrainer
from detectron2.evaluation import inference_on_dataset
from detectron2.utils.logger import log_every_n_seconds
from detectron2.data import MetadataCatalog
from modeling import build_model
from modeling.utils import get_class_names
from modeling.BaseModel import BaseModel
from datasets import build_evaluator, build_eval_dataloader, build_train_dataloader
from utilities.distributed import is_main_process
from utilities.constants import COCO_PANOPTIC_CLASSES
from trainer.utils.misc import move_batch_to_device, cast_batch_to_half
from .utils.misc import hook_metadata, hook_switcher, hook_opt
logger = logging.getLogger(__name__)
class XDecoderPipeline:
def __init__(self, opt):
self._opt = opt
print(self._opt['RESUME_FROM'])
def initialize_model(self):
model_name = "default"
model = build_model(self._opt)
model.train()
if is_main_process():
logger.info(model)
raw_models = {model_name: BaseModel(self._opt, model)}
return raw_models
def get_dataloaders(
self, trainer: DefaultTrainer,
dataset_label: str,
is_evaluation: bool
) -> Union[DataLoader, iterators.CheckpointableIterator]:
distributed = self._opt['world_size'] > 1
if is_evaluation:
if not hasattr(self, 'valid_loader'):
dataloaders = build_eval_dataloader(self._opt)
self.valid_loader = dataloaders
else:
dataloaders = self.valid_loader
idx = 0 if dataset_label=='dev' else self._opt['DATASETS']['TEST'].index(dataset_label)
dataloader = dataloaders[idx]
self.evaluator = build_evaluator(self._opt, self._opt['DATASETS']['TEST'][idx], self._opt['SAVE_DIR'])
else:
if not hasattr(self, 'train_loader'):
dataloader = build_train_dataloader(self._opt)
self.train_loader = dataloader
logger.info(f'num of train samples: {len(dataloader)}')
else:
dataloader = self.train_loader
# temp solution for lr scheduler
steps_total = len(self.train_loader)
steps_acc = self._opt['GRADIENT_ACCUMULATE_STEP']
steps_update = steps_total // steps_acc
self._opt["LR_SCHEDULER_PARAMS"]["steps_update_per_epoch"] = steps_update
return dataloader
@staticmethod
def forward_func(trainer, batch):
loss = trainer.models['default'](batch)
return loss
def forward_step(
self,
trainer: DefaultTrainer,
batch,
grad_acc_batches: List,
grad_acc_index: int,
is_distributed: bool,
) -> Tuple[Dict[str, float], Dict[str, int], Dict]:
loss_info, sample_size_info, extra_info = {}, {}, {}
batch = move_batch_to_device(batch, self._opt['device'])
if self._opt['FP16']:
# in FP16 mode, DeepSpeed casts the model to FP16, so the input needs to be manually casted to FP16
batch = cast_batch_to_half(batch)
loss = trainer.compute_loss(self.forward_func, batch)
loss_info = {k: v.detach().item() for k,v in loss.items()}
sample_size_info = {'num_samples': len(batch)}
loss = sum(loss for loss in loss.values())
trainer.backward_loss(loss, model_names=['default'])
trainer.update_model(model_name='default')
return loss_info, sample_size_info, extra_info
def evaluate_model(
self,
trainer: DefaultTrainer,
save_folder,
) -> Tuple[Dict, Dict[str, float], bool]:
model = trainer.raw_models['default'].eval()
self._opt = hook_opt(self._opt)
dataset_names = self._opt['DATASETS']['TEST']
scores = {}
summary = {}
for dataset_label in dataset_names:
torch.cuda.empty_cache()
eval_batch_gen = self.get_dataloaders(trainer, dataset_label, is_evaluation=True)
self.evaluator.reset()
with torch.no_grad():
names = get_class_names(dataset_label)
if self._opt['MODEL']['ENCODER']['BINARY_CLASSES']:
names = ['target', 'background']
model.model.metadata = MetadataCatalog.get(dataset_label)
model.model.metadata = hook_metadata(model.model.metadata, dataset_label)
eval_type = model.model.metadata.evaluator_type
if 'background' in names:
model.model.sem_seg_head.num_classes = len(names) - 1
model.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(names, is_eval=True)
hook_switcher(model, dataset_label)
total = len(eval_batch_gen)
num_warmup = min(5, total - 1)
start_time = time.perf_counter()
total_data_time = 0
total_compute_time = 0
total_eval_time = 0
start_data_time = time.perf_counter()
for idx, batch in enumerate(eval_batch_gen):
total_data_time += time.perf_counter() - start_data_time
if idx == num_warmup:
start_time = time.perf_counter()
total_data_time = 0
total_compute_time = 0
total_eval_time = 0
start_compute_time = time.perf_counter()
batch = move_batch_to_device(batch, self._opt['device'])
if self._opt['FP16']:
# in FP16 mode, DeepSpeed casts the model to FP16, so the input needs to be manually casted to FP16
batch = cast_batch_to_half(batch)
outputs = model(batch, mode=eval_type)
if torch.cuda.is_available():
torch.cuda.synchronize()
total_compute_time += time.perf_counter() - start_compute_time
start_eval_time = time.perf_counter()
self.evaluator.process(batch, outputs)
total_eval_time += time.perf_counter() - start_eval_time
iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
data_seconds_per_iter = total_data_time / iters_after_start
compute_seconds_per_iter = total_compute_time / iters_after_start
eval_seconds_per_iter = total_eval_time / iters_after_start
total_seconds_per_iter = (time.perf_counter() - start_time) / iters_after_start
if is_main_process() and (idx >= num_warmup * 2 or compute_seconds_per_iter > 5):
eta = datetime.timedelta(seconds=int(total_seconds_per_iter * (total - idx - 1)))
log_every_n_seconds(
logging.INFO,
(
f"Task {dataset_label}. "
f"Inference done {idx + 1}/{total}. "
f"Dataloading: {data_seconds_per_iter:.4f} s/iter. "
f"Inference: {compute_seconds_per_iter:.4f} s/iter. "
f"Eval: {eval_seconds_per_iter:.4f} s/iter. "
f"Total: {total_seconds_per_iter:.4f} s/iter. "
f"ETA={eta}"
),
n=5,
)
start_data_time = time.perf_counter()
results = self.evaluator.evaluate()
model.model.sem_seg_head.predictor.lang_encoder.reset_text_embeddings()
if is_main_process():
scores["{}/{}".format(dataset_label, eval_type)] = results
# set back to training stat.
model.model.sem_seg_head.num_classes = self._opt['MODEL']['ENCODER']['NUM_CLASSES']
model.model.metadata = MetadataCatalog.get(self._opt['DATASETS']['TRAIN'][0])
# save scores
if is_main_process():
model_name = self._opt['RESUME_FROM'].split('/')[-1].split('.')[0]
with open(os.path.join(save_folder,f'{model_name}_eval_results.json'), 'w') as f:
json.dump(scores, f, indent=4)
# todo
# hack to return only results/scores
for datatype in scores:
for evaltype in scores[datatype]:
if 'instance_results' in scores[datatype][evaltype]:
scores[datatype][evaltype]= scores[datatype][evaltype]['scores']
return scores |