# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. import numpy as np import pickle import torch import tqdm from fvcore.common.file_io import PathManager import slowfast.datasets.utils as data_utils import slowfast.utils.checkpoint as cu import slowfast.utils.distributed as du import slowfast.utils.logging as logging import slowfast.utils.misc as misc import slowfast.visualization.tensorboard_vis as tb from slowfast.datasets import loader from slowfast.models import build_model from slowfast.visualization.gradcam_utils import GradCAM from slowfast.visualization.prediction_vis import WrongPredictionVis from slowfast.visualization.utils import ( GetWeightAndActivation, process_layer_index_data, ) from slowfast.visualization.video_visualizer import VideoVisualizer logger = logging.get_logger(__name__) def run_visualization(vis_loader, model, cfg, writer=None): """ Run model visualization (weights, activations and model inputs) and visualize them on Tensorboard. Args: vis_loader (loader): video visualization loader. model (model): the video model to visualize. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ n_devices = cfg.NUM_GPUS * cfg.NUM_SHARDS prefix = "module/" if n_devices > 1 else "" # Get a list of selected layer names and indexing. layer_ls, indexing_dict = process_layer_index_data( cfg.TENSORBOARD.MODEL_VIS.LAYER_LIST, layer_name_prefix=prefix ) logger.info("Start Model Visualization.") # Register hooks for activations. model_vis = GetWeightAndActivation(model, layer_ls) if writer is not None and cfg.TENSORBOARD.MODEL_VIS.MODEL_WEIGHTS: layer_weights = model_vis.get_weights() writer.plot_weights_and_activations( layer_weights, tag="Layer Weights/", heat_map=False ) video_vis = VideoVisualizer( cfg.MODEL.NUM_CLASSES, cfg.TENSORBOARD.CLASS_NAMES_PATH, cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS, cfg.TENSORBOARD.MODEL_VIS.COLORMAP, ) if n_devices > 1: grad_cam_layer_ls = [ "module/" + layer for layer in cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST ] else: grad_cam_layer_ls = cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE: gradcam = GradCAM( model, target_layers=grad_cam_layer_ls, data_mean=cfg.DATA.MEAN, data_std=cfg.DATA.STD, colormap=cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.COLORMAP, ) logger.info("Finish drawing weights.") global_idx = -1 for inputs, labels, _, meta in tqdm.tqdm(vis_loader): if cfg.NUM_GPUS: # Transfer the data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list,)): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) if cfg.DETECTION.ENABLE: activations, preds = model_vis.get_activations( inputs, meta["boxes"] ) else: activations, preds = model_vis.get_activations(inputs) if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE: if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.USE_TRUE_LABEL: inputs, preds = gradcam(inputs, labels=labels) else: inputs, preds = gradcam(inputs) if cfg.NUM_GPUS: inputs = du.all_gather_unaligned(inputs) activations = du.all_gather_unaligned(activations) preds = du.all_gather_unaligned(preds) if isinstance(inputs[0], list): for i in range(len(inputs)): for j in range(len(inputs[0])): inputs[i][j] = inputs[i][j].cpu() else: inputs = [inp.cpu() for inp in inputs] preds = [pred.cpu() for pred in preds] else: inputs, activations, preds = [inputs], [activations], [preds] boxes = [None] * max(n_devices, 1) if cfg.DETECTION.ENABLE and cfg.NUM_GPUS: boxes = du.all_gather_unaligned(meta["boxes"]) boxes = [box.cpu() for box in boxes] if writer is not None: total_vids = 0 for i in range(max(n_devices, 1)): cur_input = inputs[i] cur_activations = activations[i] cur_batch_size = cur_input[0].shape[0] cur_preds = preds[i] cur_boxes = boxes[i] for cur_batch_idx in range(cur_batch_size): global_idx += 1 total_vids += 1 if ( cfg.TENSORBOARD.MODEL_VIS.INPUT_VIDEO or cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE ): for path_idx, input_pathway in enumerate(cur_input): if cfg.TEST.DATASET == "ava" and cfg.AVA.BGR: video = input_pathway[ cur_batch_idx, [2, 1, 0], ... ] else: video = input_pathway[cur_batch_idx] if not cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE: # Permute to (T, H, W, C) from (C, T, H, W). video = video.permute(1, 2, 3, 0) video = data_utils.revert_tensor_normalize( video, cfg.DATA.MEAN, cfg.DATA.STD ) else: # Permute from (T, C, H, W) to (T, H, W, C) video = video.permute(0, 2, 3, 1) bboxes = ( None if cur_boxes is None else cur_boxes[:, 1:] ) cur_prediction = ( cur_preds if cfg.DETECTION.ENABLE else cur_preds[cur_batch_idx] ) video = video_vis.draw_clip( video, cur_prediction, bboxes=bboxes ) video = ( torch.from_numpy(np.array(video)) .permute(0, 3, 1, 2) .unsqueeze(0) ) writer.add_video( video, tag="Input {}/Pathway {}".format( global_idx, path_idx + 1 ), ) if cfg.TENSORBOARD.MODEL_VIS.ACTIVATIONS: writer.plot_weights_and_activations( cur_activations, tag="Input {}/Activations: ".format(global_idx), batch_idx=cur_batch_idx, indexing_dict=indexing_dict, ) def perform_wrong_prediction_vis(vis_loader, model, cfg): """ Visualize video inputs with wrong predictions on Tensorboard. Args: vis_loader (loader): video visualization loader. model (model): the video model to visualize. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ wrong_prediction_visualizer = WrongPredictionVis(cfg=cfg) for batch_idx, (inputs, labels, _, _) in tqdm.tqdm(enumerate(vis_loader)): if cfg.NUM_GPUS: # Transfer the data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() # Some model modify the original input. inputs_clone = [inp.clone() for inp in inputs] preds = model(inputs) if cfg.NUM_GPUS > 1: preds, labels = du.all_gather([preds, labels]) if isinstance(inputs_clone, (list,)): inputs_clone = du.all_gather(inputs_clone) else: inputs_clone = du.all_gather([inputs_clone])[0] if cfg.NUM_GPUS: # Transfer the data to the current CPU device. labels = labels.cpu() preds = preds.cpu() if isinstance(inputs_clone, (list,)): for i in range(len(inputs_clone)): inputs_clone[i] = inputs_clone[i].cpu() else: inputs_clone = inputs_clone.cpu() # If using CPU (NUM_GPUS = 0), 1 represent 1 CPU. n_devices = max(cfg.NUM_GPUS, 1) for device_idx in range(1, n_devices + 1): wrong_prediction_visualizer.visualize_vid( video_input=inputs_clone, labels=labels, preds=preds.detach().clone(), batch_idx=device_idx * batch_idx, ) logger.info( "Class indices with wrong predictions: {}".format( sorted(wrong_prediction_visualizer.wrong_class_prediction) ) ) wrong_prediction_visualizer.clean() def visualize(cfg): """ Perform layer weights and activations visualization on the model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ if cfg.TENSORBOARD.ENABLE and ( cfg.TENSORBOARD.MODEL_VIS.ENABLE or cfg.TENSORBOARD.WRONG_PRED_VIS.ENABLE ): # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Model Visualization with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) model.eval() if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=False) cu.load_test_checkpoint(cfg, model) # Create video testing loaders. vis_loader = loader.construct_loader(cfg, "test") if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE or cfg.NUM_GPUS == 0 # Set up writer for logging to Tensorboard format. if du.is_master_proc(cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None if cfg.TENSORBOARD.PREDICTIONS_PATH != "": assert not cfg.DETECTION.ENABLE, "Detection is not supported." logger.info( "Visualizing class-level performance from saved results..." ) if writer is not None: with PathManager.open( cfg.TENSORBOARD.PREDICTIONS_PATH, "rb" ) as f: preds, labels = pickle.load(f, encoding="latin1") writer.plot_eval(preds, labels) if cfg.TENSORBOARD.MODEL_VIS.ENABLE: if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE: assert ( not cfg.DETECTION.ENABLE ), "Detection task is currently not supported for Grad-CAM visualization." if cfg.MODEL.ARCH in cfg.MODEL.SINGLE_PATHWAY_ARCH: assert ( len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST) == 1 ), "The number of chosen CNN layers must be equal to the number of pathway(s), given {} layer(s).".format( len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST) ) elif cfg.MODEL.ARCH in cfg.MODEL.MULTI_PATHWAY_ARCH: assert ( len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST) == 2 ), "The number of chosen CNN layers must be equal to the number of pathway(s), given {} layer(s).".format( len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST) ) else: raise NotImplementedError( "Model arch {} is not in {}".format( cfg.MODEL.ARCH, cfg.MODEL.SINGLE_PATHWAY_ARCH + cfg.MODEL.MULTI_PATHWAY_ARCH, ) ) logger.info( "Visualize model analysis for {} iterations".format( len(vis_loader) ) ) # Run visualization on the model run_visualization(vis_loader, model, cfg, writer) if cfg.TENSORBOARD.WRONG_PRED_VIS.ENABLE: logger.info( "Visualize Wrong Predictions for {} iterations".format( len(vis_loader) ) ) perform_wrong_prediction_vis(vis_loader, model, cfg) if writer is not None: writer.close()