|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import logging |
|
import time |
|
import datetime |
|
import json |
|
import os |
|
|
|
import torch |
|
import torch.nn as nn |
|
from torch.utils.data import DataLoader |
|
from typing import Tuple, Dict, List, Union |
|
from infinibatch import iterators |
|
|
|
from trainer.default_trainer import DefaultTrainer |
|
|
|
from detectron2.evaluation import inference_on_dataset |
|
from detectron2.utils.logger import log_every_n_seconds |
|
from detectron2.data import MetadataCatalog |
|
|
|
from modeling import build_model |
|
from modeling.utils import get_class_names |
|
from modeling.BaseModel import BaseModel |
|
from datasets import build_evaluator, build_eval_dataloader, build_train_dataloader |
|
from utilities.distributed import is_main_process |
|
from utilities.constants import COCO_PANOPTIC_CLASSES |
|
from trainer.utils.misc import move_batch_to_device, cast_batch_to_half |
|
|
|
from .utils.misc import hook_metadata, hook_switcher, hook_opt |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
class XDecoderPipeline: |
|
def __init__(self, opt): |
|
self._opt = opt |
|
print(self._opt['RESUME_FROM']) |
|
|
|
def initialize_model(self): |
|
model_name = "default" |
|
model = build_model(self._opt) |
|
model.train() |
|
|
|
if is_main_process(): |
|
logger.info(model) |
|
|
|
raw_models = {model_name: BaseModel(self._opt, model)} |
|
return raw_models |
|
|
|
def get_dataloaders( |
|
self, trainer: DefaultTrainer, |
|
dataset_label: str, |
|
is_evaluation: bool |
|
) -> Union[DataLoader, iterators.CheckpointableIterator]: |
|
distributed = self._opt['world_size'] > 1 |
|
if is_evaluation: |
|
if not hasattr(self, 'valid_loader'): |
|
dataloaders = build_eval_dataloader(self._opt) |
|
self.valid_loader = dataloaders |
|
else: |
|
dataloaders = self.valid_loader |
|
idx = 0 if dataset_label=='dev' else self._opt['DATASETS']['TEST'].index(dataset_label) |
|
dataloader = dataloaders[idx] |
|
self.evaluator = build_evaluator(self._opt, self._opt['DATASETS']['TEST'][idx], self._opt['SAVE_DIR']) |
|
else: |
|
if not hasattr(self, 'train_loader'): |
|
dataloader = build_train_dataloader(self._opt) |
|
self.train_loader = dataloader |
|
logger.info(f'num of train samples: {len(dataloader)}') |
|
else: |
|
dataloader = self.train_loader |
|
|
|
|
|
steps_total = len(self.train_loader) |
|
steps_acc = self._opt['GRADIENT_ACCUMULATE_STEP'] |
|
steps_update = steps_total // steps_acc |
|
self._opt["LR_SCHEDULER_PARAMS"]["steps_update_per_epoch"] = steps_update |
|
return dataloader |
|
|
|
@staticmethod |
|
def forward_func(trainer, batch): |
|
loss = trainer.models['default'](batch) |
|
return loss |
|
|
|
def forward_step( |
|
self, |
|
trainer: DefaultTrainer, |
|
batch, |
|
grad_acc_batches: List, |
|
grad_acc_index: int, |
|
is_distributed: bool, |
|
) -> Tuple[Dict[str, float], Dict[str, int], Dict]: |
|
loss_info, sample_size_info, extra_info = {}, {}, {} |
|
batch = move_batch_to_device(batch, self._opt['device']) |
|
if self._opt['FP16']: |
|
|
|
batch = cast_batch_to_half(batch) |
|
loss = trainer.compute_loss(self.forward_func, batch) |
|
loss_info = {k: v.detach().item() for k,v in loss.items()} |
|
sample_size_info = {'num_samples': len(batch)} |
|
loss = sum(loss for loss in loss.values()) |
|
trainer.backward_loss(loss, model_names=['default']) |
|
trainer.update_model(model_name='default') |
|
return loss_info, sample_size_info, extra_info |
|
|
|
def evaluate_model( |
|
self, |
|
trainer: DefaultTrainer, |
|
save_folder, |
|
) -> Tuple[Dict, Dict[str, float], bool]: |
|
|
|
model = trainer.raw_models['default'].eval() |
|
self._opt = hook_opt(self._opt) |
|
dataset_names = self._opt['DATASETS']['TEST'] |
|
scores = {} |
|
summary = {} |
|
|
|
for dataset_label in dataset_names: |
|
torch.cuda.empty_cache() |
|
eval_batch_gen = self.get_dataloaders(trainer, dataset_label, is_evaluation=True) |
|
self.evaluator.reset() |
|
with torch.no_grad(): |
|
names = get_class_names(dataset_label) |
|
if self._opt['MODEL']['ENCODER']['BINARY_CLASSES']: |
|
names = ['target', 'background'] |
|
model.model.metadata = MetadataCatalog.get(dataset_label) |
|
model.model.metadata = hook_metadata(model.model.metadata, dataset_label) |
|
eval_type = model.model.metadata.evaluator_type |
|
if 'background' in names: |
|
model.model.sem_seg_head.num_classes = len(names) - 1 |
|
model.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(names, is_eval=True) |
|
hook_switcher(model, dataset_label) |
|
total = len(eval_batch_gen) |
|
num_warmup = min(5, total - 1) |
|
start_time = time.perf_counter() |
|
total_data_time = 0 |
|
total_compute_time = 0 |
|
total_eval_time = 0 |
|
start_data_time = time.perf_counter() |
|
|
|
for idx, batch in enumerate(eval_batch_gen): |
|
total_data_time += time.perf_counter() - start_data_time |
|
if idx == num_warmup: |
|
start_time = time.perf_counter() |
|
total_data_time = 0 |
|
total_compute_time = 0 |
|
total_eval_time = 0 |
|
|
|
start_compute_time = time.perf_counter() |
|
batch = move_batch_to_device(batch, self._opt['device']) |
|
if self._opt['FP16']: |
|
|
|
batch = cast_batch_to_half(batch) |
|
|
|
outputs = model(batch, mode=eval_type) |
|
if torch.cuda.is_available(): |
|
torch.cuda.synchronize() |
|
|
|
total_compute_time += time.perf_counter() - start_compute_time |
|
start_eval_time = time.perf_counter() |
|
|
|
self.evaluator.process(batch, outputs) |
|
total_eval_time += time.perf_counter() - start_eval_time |
|
|
|
iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) |
|
data_seconds_per_iter = total_data_time / iters_after_start |
|
compute_seconds_per_iter = total_compute_time / iters_after_start |
|
eval_seconds_per_iter = total_eval_time / iters_after_start |
|
total_seconds_per_iter = (time.perf_counter() - start_time) / iters_after_start |
|
|
|
if is_main_process() and (idx >= num_warmup * 2 or compute_seconds_per_iter > 5): |
|
eta = datetime.timedelta(seconds=int(total_seconds_per_iter * (total - idx - 1))) |
|
log_every_n_seconds( |
|
logging.INFO, |
|
( |
|
f"Task {dataset_label}. " |
|
f"Inference done {idx + 1}/{total}. " |
|
f"Dataloading: {data_seconds_per_iter:.4f} s/iter. " |
|
f"Inference: {compute_seconds_per_iter:.4f} s/iter. " |
|
f"Eval: {eval_seconds_per_iter:.4f} s/iter. " |
|
f"Total: {total_seconds_per_iter:.4f} s/iter. " |
|
f"ETA={eta}" |
|
), |
|
n=5, |
|
) |
|
start_data_time = time.perf_counter() |
|
|
|
results = self.evaluator.evaluate() |
|
model.model.sem_seg_head.predictor.lang_encoder.reset_text_embeddings() |
|
|
|
if is_main_process(): |
|
scores["{}/{}".format(dataset_label, eval_type)] = results |
|
|
|
|
|
model.model.sem_seg_head.num_classes = self._opt['MODEL']['ENCODER']['NUM_CLASSES'] |
|
model.model.metadata = MetadataCatalog.get(self._opt['DATASETS']['TRAIN'][0]) |
|
|
|
if is_main_process(): |
|
model_name = self._opt['RESUME_FROM'].split('/')[-1].split('.')[0] |
|
with open(os.path.join(save_folder,f'{model_name}_eval_results.json'), 'w') as f: |
|
json.dump(scores, f, indent=4) |
|
|
|
|
|
for datatype in scores: |
|
for evaltype in scores[datatype]: |
|
if 'instance_results' in scores[datatype][evaltype]: |
|
scores[datatype][evaltype]= scores[datatype][evaltype]['scores'] |
|
return scores |