Spaces:
Sleeping
Sleeping
Add show label on gradcam
Browse files- app.py +29 -16
- requirements.txt +1 -0
- requirements_old.txt +0 -0
- src/Nets.py +3 -1
- src/custom_code/custom_grad_cam/__init__.py +0 -20
- src/custom_code/custom_grad_cam/ablation_cam.py +0 -148
- src/custom_code/custom_grad_cam/ablation_cam_multilayer.py +0 -136
- src/custom_code/custom_grad_cam/ablation_layer.py +0 -155
- src/custom_code/custom_grad_cam/activations_and_gradients.py +0 -46
- src/custom_code/custom_grad_cam/base_cam.py +0 -205
- src/custom_code/custom_grad_cam/eigen_cam.py +0 -23
- src/custom_code/custom_grad_cam/eigen_grad_cam.py +0 -21
- src/custom_code/custom_grad_cam/feature_factorization/__init__.py +0 -0
- src/custom_code/custom_grad_cam/feature_factorization/deep_feature_factorization.py +0 -131
- src/custom_code/custom_grad_cam/feature_factorization/utils.py +0 -19
- src/custom_code/custom_grad_cam/fullgrad_cam.py +0 -95
- src/custom_code/custom_grad_cam/grad_cam.py +0 -22
- src/custom_code/custom_grad_cam/grad_cam_elementwise.py +0 -30
- src/custom_code/custom_grad_cam/grad_cam_plusplus.py +0 -32
- src/custom_code/custom_grad_cam/guided_backprop.py +0 -100
- src/custom_code/custom_grad_cam/hirescam.py +0 -32
- src/custom_code/custom_grad_cam/layer_cam.py +0 -36
- src/custom_code/custom_grad_cam/metrics/__init__.py +0 -0
- src/custom_code/custom_grad_cam/metrics/cam_mult_image.py +0 -37
- src/custom_code/custom_grad_cam/metrics/perturbation_confidence.py +0 -109
- src/custom_code/custom_grad_cam/metrics/road.py +0 -181
- src/custom_code/custom_grad_cam/random_cam.py +0 -22
- src/custom_code/custom_grad_cam/score_cam.py +0 -60
- src/custom_code/custom_grad_cam/sobel_cam.py +0 -11
- src/custom_code/custom_grad_cam/utils/__init__.py +0 -4
- src/custom_code/custom_grad_cam/utils/find_layers.py +0 -30
- src/custom_code/custom_grad_cam/utils/image.py +0 -183
- src/custom_code/custom_grad_cam/utils/model_targets.py +0 -103
- src/custom_code/custom_grad_cam/utils/reshape_transforms.py +0 -34
- src/custom_code/custom_grad_cam/utils/svd_on_activations.py +0 -19
- src/custom_code/custom_grad_cam/xgrad_cam.py +0 -31
- src/gradio_blocks.py +19 -11
- src/results/gradcam_video.mp4 +2 -2
- src/results/infer_image.png +2 -2
app.py
CHANGED
@@ -14,15 +14,14 @@ import mediapy
|
|
14 |
import numpy as np
|
15 |
import pandas as pd
|
16 |
import torch
|
17 |
-
|
18 |
from gradio_blocks import build_video_to_camvideo
|
19 |
from Nets import CustomResNet18
|
20 |
from PIL import Image, ImageDraw, ImageFont
|
21 |
|
22 |
-
|
23 |
-
from
|
24 |
-
from
|
25 |
-
from custom_code.custom_grad_cam.utils.image import show_cam_on_image
|
26 |
|
27 |
from tqdm import tqdm
|
28 |
from util import transform
|
@@ -36,9 +35,9 @@ IMAGE_PATH = os.path.join(os.getcwd(), 'src/examples')
|
|
36 |
IMAGES_PER_ROW = 5
|
37 |
|
38 |
MAXIMAL_FRAMES = 700
|
39 |
-
BATCHES_TO_PROCESS =
|
40 |
-
OUTPUT_FPS =
|
41 |
-
MAX_OUT_FRAMES =
|
42 |
|
43 |
MODEL = CustomResNet18(111).eval()
|
44 |
MODEL.load_state_dict(torch.load('src/results/models/best_model.pth', map_location=torch.device('cpu')))
|
@@ -51,8 +50,6 @@ LANGUAGES_TO_SELECT = {
|
|
51 |
"Italian": "it",
|
52 |
"Finnish": "fi",
|
53 |
"Ukrainian": "uk",
|
54 |
-
"Japanese": "ja",
|
55 |
-
"Hebrew": "iw"
|
56 |
}
|
57 |
|
58 |
CAM_METHODS = {
|
@@ -106,8 +103,12 @@ def get_translated(to_translate, target_language="German"):
|
|
106 |
target_language = LANGUAGES_TO_SELECT[target_language] if target_language in LANGUAGES_TO_SELECT else target_language
|
107 |
if target_language == "en": return to_translate
|
108 |
if target_language not in LANGUAGES_TO_SELECT.values(): raise gr.Error(f'Language {target_language} not found.')
|
109 |
-
|
110 |
-
|
|
|
|
|
|
|
|
|
111 |
with ThreadPoolExecutor(max_workers=30) as executor:
|
112 |
# give the executor the list of images and args (in this case, the target language)
|
113 |
# and let the executor map the function to the list of images
|
@@ -156,8 +157,8 @@ def gradcam(image, colormap="Jet", use_eigen_smooth=False, use_aug_smooth=False,
|
|
156 |
|
157 |
with CAM_METHODS[cam_method](model=MODEL, target_layers=layers) as cam:
|
158 |
grayscale_cam = cam(input_tensor=image_tensor, targets=targets, aug_smooth=use_aug_smooth, eigen_smooth=use_eigen_smooth)
|
159 |
-
|
160 |
-
|
161 |
|
162 |
grayscale_cam = grayscale_cam[0, :]
|
163 |
grayscale_cam = cv2.resize(grayscale_cam, (image_width, image_height), interpolation=cv2.INTER_CUBIC)
|
@@ -185,7 +186,7 @@ def gradcam(image, colormap="Jet", use_eigen_smooth=False, use_aug_smooth=False,
|
|
185 |
out_image = Image.fromarray(visualization)
|
186 |
return out_image
|
187 |
|
188 |
-
def gradcam_video(video, colormap="Jet", use_eigen_smooth=False, BWHighlight=False, alpha=0.5, cam_method=GradCAM, layer=None, specific_class="Predicted Class"):
|
189 |
global OUTPUT_FPS, MAXIMAL_FRAMES, BATCHES_TO_PROCESS, MAX_OUT_FRAMES
|
190 |
if video is None: raise gr.Error("Please upload a video.")
|
191 |
if colormap not in CV2_COLORMAPS.keys():
|
@@ -241,6 +242,18 @@ def gradcam_video(video, colormap="Jet", use_eigen_smooth=False, BWHighlight=Fal
|
|
241 |
else:
|
242 |
image = image / 255
|
243 |
visualization = show_cam_on_image(image, _grayscale_cam, use_rgb=True, image_weight=alpha, colormap=colormap)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
results.append(visualization)
|
245 |
|
246 |
# save video
|
@@ -479,7 +492,7 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped', css=css) as demo:
|
|
479 |
# Video CAM
|
480 |
# -------------------------------------------
|
481 |
with gr.Tab("Explain Video"):
|
482 |
-
build_video_to_camvideo(CAM_METHODS, CV2_COLORMAPS, LAYERS, ALL_CLASSES, gradcam_video)
|
483 |
|
484 |
# -------------------------------------------
|
485 |
# EXAMPLES
|
|
|
14 |
import numpy as np
|
15 |
import pandas as pd
|
16 |
import torch
|
17 |
+
import deep_translator
|
18 |
from gradio_blocks import build_video_to_camvideo
|
19 |
from Nets import CustomResNet18
|
20 |
from PIL import Image, ImageDraw, ImageFont
|
21 |
|
22 |
+
from pytorch_grad_cam import GradCAM, HiResCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, FullGrad
|
23 |
+
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
|
24 |
+
from pytorch_grad_cam.utils.image import show_cam_on_image
|
|
|
25 |
|
26 |
from tqdm import tqdm
|
27 |
from util import transform
|
|
|
35 |
IMAGES_PER_ROW = 5
|
36 |
|
37 |
MAXIMAL_FRAMES = 700
|
38 |
+
BATCHES_TO_PROCESS = 15
|
39 |
+
OUTPUT_FPS = 15
|
40 |
+
MAX_OUT_FRAMES = 90
|
41 |
|
42 |
MODEL = CustomResNet18(111).eval()
|
43 |
MODEL.load_state_dict(torch.load('src/results/models/best_model.pth', map_location=torch.device('cpu')))
|
|
|
50 |
"Italian": "it",
|
51 |
"Finnish": "fi",
|
52 |
"Ukrainian": "uk",
|
|
|
|
|
53 |
}
|
54 |
|
55 |
CAM_METHODS = {
|
|
|
103 |
target_language = LANGUAGES_TO_SELECT[target_language] if target_language in LANGUAGES_TO_SELECT else target_language
|
104 |
if target_language == "en": return to_translate
|
105 |
if target_language not in LANGUAGES_TO_SELECT.values(): raise gr.Error(f'Language {target_language} not found.')
|
106 |
+
try:
|
107 |
+
return deep_translator.GoogleTranslator(source="en", target=target_language).translate(to_translate)
|
108 |
+
except deep_translator.exceptions.TooManyRequests:
|
109 |
+
print(f'Too many requests for {to_translate} to {target_language}.')
|
110 |
+
return ("-/-")
|
111 |
+
|
112 |
with ThreadPoolExecutor(max_workers=30) as executor:
|
113 |
# give the executor the list of images and args (in this case, the target language)
|
114 |
# and let the executor map the function to the list of images
|
|
|
157 |
|
158 |
with CAM_METHODS[cam_method](model=MODEL, target_layers=layers) as cam:
|
159 |
grayscale_cam = cam(input_tensor=image_tensor, targets=targets, aug_smooth=use_aug_smooth, eigen_smooth=use_eigen_smooth)
|
160 |
+
if label_image:
|
161 |
+
predicted_animal = get_class_name(np.argmax(MODEL.output.cpu().data.numpy(), axis=-1)[0])
|
162 |
|
163 |
grayscale_cam = grayscale_cam[0, :]
|
164 |
grayscale_cam = cv2.resize(grayscale_cam, (image_width, image_height), interpolation=cv2.INTER_CUBIC)
|
|
|
186 |
out_image = Image.fromarray(visualization)
|
187 |
return out_image
|
188 |
|
189 |
+
def gradcam_video(video, colormap="Jet", use_eigen_smooth=False, BWHighlight=False, alpha=0.5, cam_method=GradCAM, layer=None, specific_class="Predicted Class", label_image=True, target_lang="German"):
|
190 |
global OUTPUT_FPS, MAXIMAL_FRAMES, BATCHES_TO_PROCESS, MAX_OUT_FRAMES
|
191 |
if video is None: raise gr.Error("Please upload a video.")
|
192 |
if colormap not in CV2_COLORMAPS.keys():
|
|
|
242 |
else:
|
243 |
image = image / 255
|
244 |
visualization = show_cam_on_image(image, _grayscale_cam, use_rgb=True, image_weight=alpha, colormap=colormap)
|
245 |
+
|
246 |
+
if label_image:
|
247 |
+
pass
|
248 |
+
predicted_animal = get_class_name(np.argmax(MODEL.output.cpu().data.numpy(), axis=-1)[i])
|
249 |
+
plt_image = Image.fromarray(visualization, mode="RGB")
|
250 |
+
draw = ImageDraw.Draw(plt_image)
|
251 |
+
draw.rectangle((5, 5, 150, 30), fill=(10, 10, 10, 100))
|
252 |
+
animal = predicted_animal.capitalize()
|
253 |
+
if target_lang is not None and target_lang != "None":
|
254 |
+
animal += f' ({get_translated(animal, target_lang)})'
|
255 |
+
draw.text((10, 7), animal, font=font, fill=(255, 125, 0, 255))
|
256 |
+
visualization = np.array(plt_image)
|
257 |
results.append(visualization)
|
258 |
|
259 |
# save video
|
|
|
492 |
# Video CAM
|
493 |
# -------------------------------------------
|
494 |
with gr.Tab("Explain Video"):
|
495 |
+
build_video_to_camvideo(CAM_METHODS, CV2_COLORMAPS, LAYERS, ALL_CLASSES, gradcam_video, animal_translation_target_language)
|
496 |
|
497 |
# -------------------------------------------
|
498 |
# EXAMPLES
|
requirements.txt
CHANGED
@@ -49,6 +49,7 @@ ipykernel==6.25.2
|
|
49 |
ipython==8.16.0
|
50 |
jedi==0.19.0
|
51 |
Jinja2==3.1.2
|
|
|
52 |
joblib==1.3.2
|
53 |
jsonschema==4.19.1
|
54 |
jsonschema-specifications==2023.7.1
|
|
|
49 |
ipython==8.16.0
|
50 |
jedi==0.19.0
|
51 |
Jinja2==3.1.2
|
52 |
+
grad-cam==1.4.8
|
53 |
joblib==1.3.2
|
54 |
jsonschema==4.19.1
|
55 |
jsonschema-specifications==2023.7.1
|
requirements_old.txt
DELETED
Binary file (4.01 kB)
|
|
src/Nets.py
CHANGED
@@ -4,9 +4,11 @@ from torchvision import models
|
|
4 |
class CustomResNet18(nn.Module):
|
5 |
def __init__(self, num_classes=11):
|
6 |
super(CustomResNet18, self).__init__()
|
|
|
7 |
self.resnet = models.resnet18(pretrained=True)
|
8 |
num_features = self.resnet.fc.in_features
|
9 |
self.resnet.fc = nn.Linear(num_features, num_classes)
|
10 |
|
11 |
def forward(self, x):
|
12 |
-
|
|
|
|
4 |
class CustomResNet18(nn.Module):
|
5 |
def __init__(self, num_classes=11):
|
6 |
super(CustomResNet18, self).__init__()
|
7 |
+
self.output = None
|
8 |
self.resnet = models.resnet18(pretrained=True)
|
9 |
num_features = self.resnet.fc.in_features
|
10 |
self.resnet.fc = nn.Linear(num_features, num_classes)
|
11 |
|
12 |
def forward(self, x):
|
13 |
+
self.output = self.resnet(x)
|
14 |
+
return self.output
|
src/custom_code/custom_grad_cam/__init__.py
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
from custom_grad_cam.grad_cam import GradCAM
|
2 |
-
from custom_grad_cam.hirescam import HiResCAM
|
3 |
-
from custom_grad_cam.grad_cam_elementwise import GradCAMElementWise
|
4 |
-
from custom_grad_cam.ablation_layer import AblationLayer, AblationLayerVit, AblationLayerFasterRCNN
|
5 |
-
from custom_grad_cam.ablation_cam import AblationCAM
|
6 |
-
from custom_grad_cam.xgrad_cam import XGradCAM
|
7 |
-
from custom_grad_cam.grad_cam_plusplus import GradCAMPlusPlus
|
8 |
-
from custom_grad_cam.score_cam import ScoreCAM
|
9 |
-
from custom_grad_cam.layer_cam import LayerCAM
|
10 |
-
from custom_grad_cam.eigen_cam import EigenCAM
|
11 |
-
from custom_grad_cam.eigen_grad_cam import EigenGradCAM
|
12 |
-
from custom_grad_cam.random_cam import RandomCAM
|
13 |
-
from custom_grad_cam.fullgrad_cam import FullGrad
|
14 |
-
from custom_grad_cam.guided_backprop import GuidedBackpropReLUModel
|
15 |
-
from custom_grad_cam.activations_and_gradients import ActivationsAndGradients
|
16 |
-
from custom_grad_cam.feature_factorization.deep_feature_factorization import DeepFeatureFactorization, run_dff_on_image
|
17 |
-
import custom_grad_cam.utils.model_targets
|
18 |
-
import custom_grad_cam.utils.reshape_transforms
|
19 |
-
import custom_grad_cam.metrics.cam_mult_image
|
20 |
-
import custom_grad_cam.metrics.road
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/ablation_cam.py
DELETED
@@ -1,148 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import torch
|
3 |
-
import tqdm
|
4 |
-
from typing import Callable, List
|
5 |
-
from custom_grad_cam.base_cam import BaseCAM
|
6 |
-
from custom_grad_cam.utils.find_layers import replace_layer_recursive
|
7 |
-
from custom_grad_cam.ablation_layer import AblationLayer
|
8 |
-
|
9 |
-
|
10 |
-
""" Implementation of AblationCAM
|
11 |
-
https://openaccess.thecvf.com/content_WACV_2020/papers/Desai_Ablation-CAM_Visual_Explanations_for_Deep_Convolutional_Network_via_Gradient-free_Localization_WACV_2020_paper.pdf
|
12 |
-
|
13 |
-
Ablate individual activations, and then measure the drop in the target score.
|
14 |
-
|
15 |
-
In the current implementation, the target layer activations is cached, so it won't be re-computed.
|
16 |
-
However layers before it, if any, will not be cached.
|
17 |
-
This means that if the target layer is a large block, for example model.featuers (in vgg), there will
|
18 |
-
be a large save in run time.
|
19 |
-
|
20 |
-
Since we have to go over many channels and ablate them, and every channel ablation requires a forward pass,
|
21 |
-
it would be nice if we could avoid doing that for channels that won't contribute anwyay, making it much faster.
|
22 |
-
The parameter ratio_channels_to_ablate controls how many channels should be ablated, using an experimental method
|
23 |
-
(to be improved). The default 1.0 value means that all channels will be ablated.
|
24 |
-
"""
|
25 |
-
|
26 |
-
|
27 |
-
class AblationCAM(BaseCAM):
|
28 |
-
def __init__(self,
|
29 |
-
model: torch.nn.Module,
|
30 |
-
target_layers: List[torch.nn.Module],
|
31 |
-
use_cuda: bool = False,
|
32 |
-
reshape_transform: Callable = None,
|
33 |
-
ablation_layer: torch.nn.Module = AblationLayer(),
|
34 |
-
batch_size: int = 32,
|
35 |
-
ratio_channels_to_ablate: float = 1.0) -> None:
|
36 |
-
|
37 |
-
super(AblationCAM, self).__init__(model,
|
38 |
-
target_layers,
|
39 |
-
use_cuda,
|
40 |
-
reshape_transform,
|
41 |
-
uses_gradients=False)
|
42 |
-
self.batch_size = batch_size
|
43 |
-
self.ablation_layer = ablation_layer
|
44 |
-
self.ratio_channels_to_ablate = ratio_channels_to_ablate
|
45 |
-
|
46 |
-
def save_activation(self, module, input, output) -> None:
|
47 |
-
""" Helper function to save the raw activations from the target layer """
|
48 |
-
self.activations = output
|
49 |
-
|
50 |
-
def assemble_ablation_scores(self,
|
51 |
-
new_scores: list,
|
52 |
-
original_score: float,
|
53 |
-
ablated_channels: np.ndarray,
|
54 |
-
number_of_channels: int) -> np.ndarray:
|
55 |
-
""" Take the value from the channels that were ablated,
|
56 |
-
and just set the original score for the channels that were skipped """
|
57 |
-
|
58 |
-
index = 0
|
59 |
-
result = []
|
60 |
-
sorted_indices = np.argsort(ablated_channels)
|
61 |
-
ablated_channels = ablated_channels[sorted_indices]
|
62 |
-
new_scores = np.float32(new_scores)[sorted_indices]
|
63 |
-
|
64 |
-
for i in range(number_of_channels):
|
65 |
-
if index < len(ablated_channels) and ablated_channels[index] == i:
|
66 |
-
weight = new_scores[index]
|
67 |
-
index = index + 1
|
68 |
-
else:
|
69 |
-
weight = original_score
|
70 |
-
result.append(weight)
|
71 |
-
|
72 |
-
return result
|
73 |
-
|
74 |
-
def get_cam_weights(self,
|
75 |
-
input_tensor: torch.Tensor,
|
76 |
-
target_layer: torch.nn.Module,
|
77 |
-
targets: List[Callable],
|
78 |
-
activations: torch.Tensor,
|
79 |
-
grads: torch.Tensor) -> np.ndarray:
|
80 |
-
|
81 |
-
# Do a forward pass, compute the target scores, and cache the
|
82 |
-
# activations
|
83 |
-
handle = target_layer.register_forward_hook(self.save_activation)
|
84 |
-
with torch.no_grad():
|
85 |
-
outputs = self.model(input_tensor)
|
86 |
-
handle.remove()
|
87 |
-
original_scores = np.float32(
|
88 |
-
[target(output).cpu().item() for target, output in zip(targets, outputs)])
|
89 |
-
|
90 |
-
# Replace the layer with the ablation layer.
|
91 |
-
# When we finish, we will replace it back, so the original model is
|
92 |
-
# unchanged.
|
93 |
-
ablation_layer = self.ablation_layer
|
94 |
-
replace_layer_recursive(self.model, target_layer, ablation_layer)
|
95 |
-
|
96 |
-
number_of_channels = activations.shape[1]
|
97 |
-
weights = []
|
98 |
-
# This is a "gradient free" method, so we don't need gradients here.
|
99 |
-
with torch.no_grad():
|
100 |
-
# Loop over each of the batch images and ablate activations for it.
|
101 |
-
for batch_index, (target, tensor) in enumerate(
|
102 |
-
zip(targets, input_tensor)):
|
103 |
-
new_scores = []
|
104 |
-
batch_tensor = tensor.repeat(self.batch_size, 1, 1, 1)
|
105 |
-
|
106 |
-
# Check which channels should be ablated. Normally this will be all channels,
|
107 |
-
# But we can also try to speed this up by using a low
|
108 |
-
# ratio_channels_to_ablate.
|
109 |
-
channels_to_ablate = ablation_layer.activations_to_be_ablated(
|
110 |
-
activations[batch_index, :], self.ratio_channels_to_ablate)
|
111 |
-
number_channels_to_ablate = len(channels_to_ablate)
|
112 |
-
|
113 |
-
for i in tqdm.tqdm(
|
114 |
-
range(
|
115 |
-
0,
|
116 |
-
number_channels_to_ablate,
|
117 |
-
self.batch_size)):
|
118 |
-
if i + self.batch_size > number_channels_to_ablate:
|
119 |
-
batch_tensor = batch_tensor[:(
|
120 |
-
number_channels_to_ablate - i)]
|
121 |
-
|
122 |
-
# Change the state of the ablation layer so it ablates the next channels.
|
123 |
-
# TBD: Move this into the ablation layer forward pass.
|
124 |
-
ablation_layer.set_next_batch(
|
125 |
-
input_batch_index=batch_index,
|
126 |
-
activations=self.activations,
|
127 |
-
num_channels_to_ablate=batch_tensor.size(0))
|
128 |
-
score = [target(o).cpu().item()
|
129 |
-
for o in self.model(batch_tensor)]
|
130 |
-
new_scores.extend(score)
|
131 |
-
ablation_layer.indices = ablation_layer.indices[batch_tensor.size(
|
132 |
-
0):]
|
133 |
-
|
134 |
-
new_scores = self.assemble_ablation_scores(
|
135 |
-
new_scores,
|
136 |
-
original_scores[batch_index],
|
137 |
-
channels_to_ablate,
|
138 |
-
number_of_channels)
|
139 |
-
weights.extend(new_scores)
|
140 |
-
|
141 |
-
weights = np.float32(weights)
|
142 |
-
weights = weights.reshape(activations.shape[:2])
|
143 |
-
original_scores = original_scores[:, None]
|
144 |
-
weights = (original_scores - weights) / original_scores
|
145 |
-
|
146 |
-
# Replace the model back to the original state
|
147 |
-
replace_layer_recursive(self.model, ablation_layer, target_layer)
|
148 |
-
return weights
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/ablation_cam_multilayer.py
DELETED
@@ -1,136 +0,0 @@
|
|
1 |
-
import cv2
|
2 |
-
import numpy as np
|
3 |
-
import torch
|
4 |
-
import tqdm
|
5 |
-
from custom_grad_cam.base_cam import BaseCAM
|
6 |
-
|
7 |
-
|
8 |
-
class AblationLayer(torch.nn.Module):
|
9 |
-
def __init__(self, layer, reshape_transform, indices):
|
10 |
-
super(AblationLayer, self).__init__()
|
11 |
-
|
12 |
-
self.layer = layer
|
13 |
-
self.reshape_transform = reshape_transform
|
14 |
-
# The channels to zero out:
|
15 |
-
self.indices = indices
|
16 |
-
|
17 |
-
def forward(self, x):
|
18 |
-
self.__call__(x)
|
19 |
-
|
20 |
-
def __call__(self, x):
|
21 |
-
output = self.layer(x)
|
22 |
-
|
23 |
-
# Hack to work with ViT,
|
24 |
-
# Since the activation channels are last and not first like in CNNs
|
25 |
-
# Probably should remove it?
|
26 |
-
if self.reshape_transform is not None:
|
27 |
-
output = output.transpose(1, 2)
|
28 |
-
|
29 |
-
for i in range(output.size(0)):
|
30 |
-
|
31 |
-
# Commonly the minimum activation will be 0,
|
32 |
-
# And then it makes sense to zero it out.
|
33 |
-
# However depending on the architecture,
|
34 |
-
# If the values can be negative, we use very negative values
|
35 |
-
# to perform the ablation, deviating from the paper.
|
36 |
-
if torch.min(output) == 0:
|
37 |
-
output[i, self.indices[i], :] = 0
|
38 |
-
else:
|
39 |
-
ABLATION_VALUE = 1e5
|
40 |
-
output[i, self.indices[i], :] = torch.min(
|
41 |
-
output) - ABLATION_VALUE
|
42 |
-
|
43 |
-
if self.reshape_transform is not None:
|
44 |
-
output = output.transpose(2, 1)
|
45 |
-
|
46 |
-
return output
|
47 |
-
|
48 |
-
|
49 |
-
def replace_layer_recursive(model, old_layer, new_layer):
|
50 |
-
for name, layer in model._modules.items():
|
51 |
-
if layer == old_layer:
|
52 |
-
model._modules[name] = new_layer
|
53 |
-
return True
|
54 |
-
elif replace_layer_recursive(layer, old_layer, new_layer):
|
55 |
-
return True
|
56 |
-
return False
|
57 |
-
|
58 |
-
|
59 |
-
class AblationCAM(BaseCAM):
|
60 |
-
def __init__(self, model, target_layers, use_cuda=False,
|
61 |
-
reshape_transform=None):
|
62 |
-
super(AblationCAM, self).__init__(model, target_layers, use_cuda,
|
63 |
-
reshape_transform)
|
64 |
-
|
65 |
-
if len(target_layers) > 1:
|
66 |
-
print(
|
67 |
-
"Warning. You are usign Ablation CAM with more than 1 layers. "
|
68 |
-
"This is supported only if all layers have the same output shape")
|
69 |
-
|
70 |
-
def set_ablation_layers(self):
|
71 |
-
self.ablation_layers = []
|
72 |
-
for target_layer in self.target_layers:
|
73 |
-
ablation_layer = AblationLayer(target_layer,
|
74 |
-
self.reshape_transform, indices=[])
|
75 |
-
self.ablation_layers.append(ablation_layer)
|
76 |
-
replace_layer_recursive(self.model, target_layer, ablation_layer)
|
77 |
-
|
78 |
-
def unset_ablation_layers(self):
|
79 |
-
# replace the model back to the original state
|
80 |
-
for ablation_layer, target_layer in zip(
|
81 |
-
self.ablation_layers, self.target_layers):
|
82 |
-
replace_layer_recursive(self.model, ablation_layer, target_layer)
|
83 |
-
|
84 |
-
def set_ablation_layer_batch_indices(self, indices):
|
85 |
-
for ablation_layer in self.ablation_layers:
|
86 |
-
ablation_layer.indices = indices
|
87 |
-
|
88 |
-
def trim_ablation_layer_batch_indices(self, keep):
|
89 |
-
for ablation_layer in self.ablation_layers:
|
90 |
-
ablation_layer.indices = ablation_layer.indices[:keep]
|
91 |
-
|
92 |
-
def get_cam_weights(self,
|
93 |
-
input_tensor,
|
94 |
-
target_category,
|
95 |
-
activations,
|
96 |
-
grads):
|
97 |
-
with torch.no_grad():
|
98 |
-
outputs = self.model(input_tensor).cpu().numpy()
|
99 |
-
original_scores = []
|
100 |
-
for i in range(input_tensor.size(0)):
|
101 |
-
original_scores.append(outputs[i, target_category[i]])
|
102 |
-
original_scores = np.float32(original_scores)
|
103 |
-
|
104 |
-
self.set_ablation_layers()
|
105 |
-
|
106 |
-
if hasattr(self, "batch_size"):
|
107 |
-
BATCH_SIZE = self.batch_size
|
108 |
-
else:
|
109 |
-
BATCH_SIZE = 32
|
110 |
-
|
111 |
-
number_of_channels = activations.shape[1]
|
112 |
-
weights = []
|
113 |
-
|
114 |
-
with torch.no_grad():
|
115 |
-
# Iterate over the input batch
|
116 |
-
for tensor, category in zip(input_tensor, target_category):
|
117 |
-
batch_tensor = tensor.repeat(BATCH_SIZE, 1, 1, 1)
|
118 |
-
for i in tqdm.tqdm(range(0, number_of_channels, BATCH_SIZE)):
|
119 |
-
self.set_ablation_layer_batch_indices(
|
120 |
-
list(range(i, i + BATCH_SIZE)))
|
121 |
-
|
122 |
-
if i + BATCH_SIZE > number_of_channels:
|
123 |
-
keep = number_of_channels - i
|
124 |
-
batch_tensor = batch_tensor[:keep]
|
125 |
-
self.trim_ablation_layer_batch_indices(self, keep)
|
126 |
-
score = self.model(batch_tensor)[:, category].cpu().numpy()
|
127 |
-
weights.extend(score)
|
128 |
-
|
129 |
-
weights = np.float32(weights)
|
130 |
-
weights = weights.reshape(activations.shape[:2])
|
131 |
-
original_scores = original_scores[:, None]
|
132 |
-
weights = (original_scores - weights) / original_scores
|
133 |
-
|
134 |
-
# replace the model back to the original state
|
135 |
-
self.unset_ablation_layers()
|
136 |
-
return weights
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/ablation_layer.py
DELETED
@@ -1,155 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
from collections import OrderedDict
|
3 |
-
import numpy as np
|
4 |
-
from custom_grad_cam.utils.svd_on_activations import get_2d_projection
|
5 |
-
|
6 |
-
|
7 |
-
class AblationLayer(torch.nn.Module):
|
8 |
-
def __init__(self):
|
9 |
-
super(AblationLayer, self).__init__()
|
10 |
-
|
11 |
-
def objectiveness_mask_from_svd(self, activations, threshold=0.01):
|
12 |
-
""" Experimental method to get a binary mask to compare if the activation is worth ablating.
|
13 |
-
The idea is to apply the EigenCAM method by doing PCA on the activations.
|
14 |
-
Then we create a binary mask by comparing to a low threshold.
|
15 |
-
Areas that are masked out, are probably not interesting anyway.
|
16 |
-
"""
|
17 |
-
|
18 |
-
projection = get_2d_projection(activations[None, :])[0, :]
|
19 |
-
projection = np.abs(projection)
|
20 |
-
projection = projection - projection.min()
|
21 |
-
projection = projection / projection.max()
|
22 |
-
projection = projection > threshold
|
23 |
-
return projection
|
24 |
-
|
25 |
-
def activations_to_be_ablated(
|
26 |
-
self,
|
27 |
-
activations,
|
28 |
-
ratio_channels_to_ablate=1.0):
|
29 |
-
""" Experimental method to get a binary mask to compare if the activation is worth ablating.
|
30 |
-
Create a binary CAM mask with objectiveness_mask_from_svd.
|
31 |
-
Score each Activation channel, by seeing how much of its values are inside the mask.
|
32 |
-
Then keep the top channels.
|
33 |
-
|
34 |
-
"""
|
35 |
-
if ratio_channels_to_ablate == 1.0:
|
36 |
-
self.indices = np.int32(range(activations.shape[0]))
|
37 |
-
return self.indices
|
38 |
-
|
39 |
-
projection = self.objectiveness_mask_from_svd(activations)
|
40 |
-
|
41 |
-
scores = []
|
42 |
-
for channel in activations:
|
43 |
-
normalized = np.abs(channel)
|
44 |
-
normalized = normalized - normalized.min()
|
45 |
-
normalized = normalized / np.max(normalized)
|
46 |
-
score = (projection * normalized).sum() / normalized.sum()
|
47 |
-
scores.append(score)
|
48 |
-
scores = np.float32(scores)
|
49 |
-
|
50 |
-
indices = list(np.argsort(scores))
|
51 |
-
high_score_indices = indices[::-
|
52 |
-
1][: int(len(indices) *
|
53 |
-
ratio_channels_to_ablate)]
|
54 |
-
low_score_indices = indices[: int(
|
55 |
-
len(indices) * ratio_channels_to_ablate)]
|
56 |
-
self.indices = np.int32(high_score_indices + low_score_indices)
|
57 |
-
return self.indices
|
58 |
-
|
59 |
-
def set_next_batch(
|
60 |
-
self,
|
61 |
-
input_batch_index,
|
62 |
-
activations,
|
63 |
-
num_channels_to_ablate):
|
64 |
-
""" This creates the next batch of activations from the layer.
|
65 |
-
Just take corresponding batch member from activations, and repeat it num_channels_to_ablate times.
|
66 |
-
"""
|
67 |
-
self.activations = activations[input_batch_index, :, :, :].clone(
|
68 |
-
).unsqueeze(0).repeat(num_channels_to_ablate, 1, 1, 1)
|
69 |
-
|
70 |
-
def __call__(self, x):
|
71 |
-
output = self.activations
|
72 |
-
for i in range(output.size(0)):
|
73 |
-
# Commonly the minimum activation will be 0,
|
74 |
-
# And then it makes sense to zero it out.
|
75 |
-
# However depending on the architecture,
|
76 |
-
# If the values can be negative, we use very negative values
|
77 |
-
# to perform the ablation, deviating from the paper.
|
78 |
-
if torch.min(output) == 0:
|
79 |
-
output[i, self.indices[i], :] = 0
|
80 |
-
else:
|
81 |
-
ABLATION_VALUE = 1e7
|
82 |
-
output[i, self.indices[i], :] = torch.min(
|
83 |
-
output) - ABLATION_VALUE
|
84 |
-
|
85 |
-
return output
|
86 |
-
|
87 |
-
|
88 |
-
class AblationLayerVit(AblationLayer):
|
89 |
-
def __init__(self):
|
90 |
-
super(AblationLayerVit, self).__init__()
|
91 |
-
|
92 |
-
def __call__(self, x):
|
93 |
-
output = self.activations
|
94 |
-
output = output.transpose(1, len(output.shape) - 1)
|
95 |
-
for i in range(output.size(0)):
|
96 |
-
|
97 |
-
# Commonly the minimum activation will be 0,
|
98 |
-
# And then it makes sense to zero it out.
|
99 |
-
# However depending on the architecture,
|
100 |
-
# If the values can be negative, we use very negative values
|
101 |
-
# to perform the ablation, deviating from the paper.
|
102 |
-
if torch.min(output) == 0:
|
103 |
-
output[i, self.indices[i], :] = 0
|
104 |
-
else:
|
105 |
-
ABLATION_VALUE = 1e7
|
106 |
-
output[i, self.indices[i], :] = torch.min(
|
107 |
-
output) - ABLATION_VALUE
|
108 |
-
|
109 |
-
output = output.transpose(len(output.shape) - 1, 1)
|
110 |
-
|
111 |
-
return output
|
112 |
-
|
113 |
-
def set_next_batch(
|
114 |
-
self,
|
115 |
-
input_batch_index,
|
116 |
-
activations,
|
117 |
-
num_channels_to_ablate):
|
118 |
-
""" This creates the next batch of activations from the layer.
|
119 |
-
Just take corresponding batch member from activations, and repeat it num_channels_to_ablate times.
|
120 |
-
"""
|
121 |
-
repeat_params = [num_channels_to_ablate] + \
|
122 |
-
len(activations.shape[:-1]) * [1]
|
123 |
-
self.activations = activations[input_batch_index, :, :].clone(
|
124 |
-
).unsqueeze(0).repeat(*repeat_params)
|
125 |
-
|
126 |
-
|
127 |
-
class AblationLayerFasterRCNN(AblationLayer):
|
128 |
-
def __init__(self):
|
129 |
-
super(AblationLayerFasterRCNN, self).__init__()
|
130 |
-
|
131 |
-
def set_next_batch(
|
132 |
-
self,
|
133 |
-
input_batch_index,
|
134 |
-
activations,
|
135 |
-
num_channels_to_ablate):
|
136 |
-
""" Extract the next batch member from activations,
|
137 |
-
and repeat it num_channels_to_ablate times.
|
138 |
-
"""
|
139 |
-
self.activations = OrderedDict()
|
140 |
-
for key, value in activations.items():
|
141 |
-
fpn_activation = value[input_batch_index,
|
142 |
-
:, :, :].clone().unsqueeze(0)
|
143 |
-
self.activations[key] = fpn_activation.repeat(
|
144 |
-
num_channels_to_ablate, 1, 1, 1)
|
145 |
-
|
146 |
-
def __call__(self, x):
|
147 |
-
result = self.activations
|
148 |
-
layers = {0: '0', 1: '1', 2: '2', 3: '3', 4: 'pool'}
|
149 |
-
num_channels_to_ablate = result['pool'].size(0)
|
150 |
-
for i in range(num_channels_to_ablate):
|
151 |
-
pyramid_layer = int(self.indices[i] / 256)
|
152 |
-
index_in_pyramid_layer = int(self.indices[i] % 256)
|
153 |
-
result[layers[pyramid_layer]][i,
|
154 |
-
index_in_pyramid_layer, :, :] = -1000
|
155 |
-
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/activations_and_gradients.py
DELETED
@@ -1,46 +0,0 @@
|
|
1 |
-
class ActivationsAndGradients:
|
2 |
-
""" Class for extracting activations and
|
3 |
-
registering gradients from targetted intermediate layers """
|
4 |
-
|
5 |
-
def __init__(self, model, target_layers, reshape_transform):
|
6 |
-
self.model = model
|
7 |
-
self.gradients = []
|
8 |
-
self.activations = []
|
9 |
-
self.reshape_transform = reshape_transform
|
10 |
-
self.handles = []
|
11 |
-
for target_layer in target_layers:
|
12 |
-
self.handles.append(
|
13 |
-
target_layer.register_forward_hook(self.save_activation))
|
14 |
-
# Because of https://github.com/pytorch/pytorch/issues/61519,
|
15 |
-
# we don't use backward hook to record gradients.
|
16 |
-
self.handles.append(
|
17 |
-
target_layer.register_forward_hook(self.save_gradient))
|
18 |
-
|
19 |
-
def save_activation(self, module, input, output):
|
20 |
-
activation = output
|
21 |
-
|
22 |
-
if self.reshape_transform is not None:
|
23 |
-
activation = self.reshape_transform(activation)
|
24 |
-
self.activations.append(activation.cpu().detach())
|
25 |
-
|
26 |
-
def save_gradient(self, module, input, output):
|
27 |
-
if not hasattr(output, "requires_grad") or not output.requires_grad:
|
28 |
-
# You can only register hooks on tensor requires grad.
|
29 |
-
return
|
30 |
-
|
31 |
-
# Gradients are computed in reverse order
|
32 |
-
def _store_grad(grad):
|
33 |
-
if self.reshape_transform is not None:
|
34 |
-
grad = self.reshape_transform(grad)
|
35 |
-
self.gradients = [grad.cpu().detach()] + self.gradients
|
36 |
-
|
37 |
-
output.register_hook(_store_grad)
|
38 |
-
|
39 |
-
def __call__(self, x):
|
40 |
-
self.gradients = []
|
41 |
-
self.activations = []
|
42 |
-
return self.model(x)
|
43 |
-
|
44 |
-
def release(self):
|
45 |
-
for handle in self.handles:
|
46 |
-
handle.remove()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/base_cam.py
DELETED
@@ -1,205 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import torch
|
3 |
-
import ttach as tta
|
4 |
-
from typing import Callable, List, Tuple
|
5 |
-
from custom_grad_cam.activations_and_gradients import ActivationsAndGradients
|
6 |
-
from custom_grad_cam.utils.svd_on_activations import get_2d_projection
|
7 |
-
from custom_grad_cam.utils.image import scale_cam_image
|
8 |
-
from custom_grad_cam.utils.model_targets import ClassifierOutputTarget
|
9 |
-
|
10 |
-
|
11 |
-
class BaseCAM:
|
12 |
-
def __init__(self,
|
13 |
-
model: torch.nn.Module,
|
14 |
-
target_layers: List[torch.nn.Module],
|
15 |
-
use_cuda: bool = False,
|
16 |
-
reshape_transform: Callable = None,
|
17 |
-
compute_input_gradient: bool = False,
|
18 |
-
uses_gradients: bool = True) -> None:
|
19 |
-
self.model = model.eval()
|
20 |
-
self.target_layers = target_layers
|
21 |
-
self.cuda = use_cuda
|
22 |
-
if self.cuda:
|
23 |
-
self.model = model.cuda()
|
24 |
-
self.reshape_transform = reshape_transform
|
25 |
-
self.compute_input_gradient = compute_input_gradient
|
26 |
-
self.uses_gradients = uses_gradients
|
27 |
-
self.activations_and_grads = ActivationsAndGradients(
|
28 |
-
self.model, target_layers, reshape_transform)
|
29 |
-
self.outputs = None
|
30 |
-
|
31 |
-
""" Get a vector of weights for every channel in the target layer.
|
32 |
-
Methods that return weights channels,
|
33 |
-
will typically need to only implement this function. """
|
34 |
-
|
35 |
-
def get_cam_weights(self,
|
36 |
-
input_tensor: torch.Tensor,
|
37 |
-
target_layers: List[torch.nn.Module],
|
38 |
-
targets: List[torch.nn.Module],
|
39 |
-
activations: torch.Tensor,
|
40 |
-
grads: torch.Tensor) -> np.ndarray:
|
41 |
-
raise Exception("Not Implemented")
|
42 |
-
|
43 |
-
def get_cam_image(self,
|
44 |
-
input_tensor: torch.Tensor,
|
45 |
-
target_layer: torch.nn.Module,
|
46 |
-
targets: List[torch.nn.Module],
|
47 |
-
activations: torch.Tensor,
|
48 |
-
grads: torch.Tensor,
|
49 |
-
eigen_smooth: bool = False) -> np.ndarray:
|
50 |
-
|
51 |
-
weights = self.get_cam_weights(input_tensor,
|
52 |
-
target_layer,
|
53 |
-
targets,
|
54 |
-
activations,
|
55 |
-
grads)
|
56 |
-
weighted_activations = weights[:, :, None, None] * activations
|
57 |
-
if eigen_smooth:
|
58 |
-
cam = get_2d_projection(weighted_activations)
|
59 |
-
else:
|
60 |
-
cam = weighted_activations.sum(axis=1)
|
61 |
-
return cam
|
62 |
-
|
63 |
-
def forward(self,
|
64 |
-
input_tensor: torch.Tensor,
|
65 |
-
targets: List[torch.nn.Module],
|
66 |
-
eigen_smooth: bool = False) -> np.ndarray:
|
67 |
-
|
68 |
-
if self.cuda:
|
69 |
-
input_tensor = input_tensor.cuda()
|
70 |
-
|
71 |
-
if self.compute_input_gradient:
|
72 |
-
input_tensor = torch.autograd.Variable(input_tensor,
|
73 |
-
requires_grad=True)
|
74 |
-
|
75 |
-
outputs = self.activations_and_grads(input_tensor)
|
76 |
-
self.outputs = outputs
|
77 |
-
if targets is None:
|
78 |
-
target_categories = np.argmax(outputs.cpu().data.numpy(), axis=-1)
|
79 |
-
targets = [ClassifierOutputTarget(
|
80 |
-
category) for category in target_categories]
|
81 |
-
|
82 |
-
if self.uses_gradients:
|
83 |
-
self.model.zero_grad()
|
84 |
-
loss = sum([target(output)
|
85 |
-
for target, output in zip(targets, outputs)])
|
86 |
-
loss.backward(retain_graph=True)
|
87 |
-
|
88 |
-
# In most of the saliency attribution papers, the saliency is
|
89 |
-
# computed with a single target layer.
|
90 |
-
# Commonly it is the last convolutional layer.
|
91 |
-
# Here we support passing a list with multiple target layers.
|
92 |
-
# It will compute the saliency image for every image,
|
93 |
-
# and then aggregate them (with a default mean aggregation).
|
94 |
-
# This gives you more flexibility in case you just want to
|
95 |
-
# use all conv layers for example, all Batchnorm layers,
|
96 |
-
# or something else.
|
97 |
-
cam_per_layer = self.compute_cam_per_layer(input_tensor,
|
98 |
-
targets,
|
99 |
-
eigen_smooth)
|
100 |
-
return self.aggregate_multi_layers(cam_per_layer)
|
101 |
-
|
102 |
-
def get_target_width_height(self,
|
103 |
-
input_tensor: torch.Tensor) -> Tuple[int, int]:
|
104 |
-
width, height = input_tensor.size(-1), input_tensor.size(-2)
|
105 |
-
return width, height
|
106 |
-
|
107 |
-
def compute_cam_per_layer(
|
108 |
-
self,
|
109 |
-
input_tensor: torch.Tensor,
|
110 |
-
targets: List[torch.nn.Module],
|
111 |
-
eigen_smooth: bool) -> np.ndarray:
|
112 |
-
activations_list = [a.cpu().data.numpy()
|
113 |
-
for a in self.activations_and_grads.activations]
|
114 |
-
grads_list = [g.cpu().data.numpy()
|
115 |
-
for g in self.activations_and_grads.gradients]
|
116 |
-
target_size = self.get_target_width_height(input_tensor)
|
117 |
-
|
118 |
-
cam_per_target_layer = []
|
119 |
-
# Loop over the saliency image from every layer
|
120 |
-
for i in range(len(self.target_layers)):
|
121 |
-
target_layer = self.target_layers[i]
|
122 |
-
layer_activations = None
|
123 |
-
layer_grads = None
|
124 |
-
if i < len(activations_list):
|
125 |
-
layer_activations = activations_list[i]
|
126 |
-
if i < len(grads_list):
|
127 |
-
layer_grads = grads_list[i]
|
128 |
-
|
129 |
-
cam = self.get_cam_image(input_tensor,
|
130 |
-
target_layer,
|
131 |
-
targets,
|
132 |
-
layer_activations,
|
133 |
-
layer_grads,
|
134 |
-
eigen_smooth)
|
135 |
-
cam = np.maximum(cam, 0)
|
136 |
-
scaled = scale_cam_image(cam, target_size)
|
137 |
-
cam_per_target_layer.append(scaled[:, None, :])
|
138 |
-
|
139 |
-
return cam_per_target_layer
|
140 |
-
|
141 |
-
def aggregate_multi_layers(
|
142 |
-
self,
|
143 |
-
cam_per_target_layer: np.ndarray) -> np.ndarray:
|
144 |
-
cam_per_target_layer = np.concatenate(cam_per_target_layer, axis=1)
|
145 |
-
cam_per_target_layer = np.maximum(cam_per_target_layer, 0)
|
146 |
-
result = np.mean(cam_per_target_layer, axis=1)
|
147 |
-
return scale_cam_image(result)
|
148 |
-
|
149 |
-
def forward_augmentation_smoothing(self,
|
150 |
-
input_tensor: torch.Tensor,
|
151 |
-
targets: List[torch.nn.Module],
|
152 |
-
eigen_smooth: bool = False) -> np.ndarray:
|
153 |
-
transforms = tta.Compose(
|
154 |
-
[
|
155 |
-
tta.HorizontalFlip(),
|
156 |
-
tta.Multiply(factors=[0.9, 1, 1.1]),
|
157 |
-
]
|
158 |
-
)
|
159 |
-
cams = []
|
160 |
-
for transform in transforms:
|
161 |
-
augmented_tensor = transform.augment_image(input_tensor)
|
162 |
-
cam = self.forward(augmented_tensor,
|
163 |
-
targets,
|
164 |
-
eigen_smooth)
|
165 |
-
|
166 |
-
# The ttach library expects a tensor of size BxCxHxW
|
167 |
-
cam = cam[:, None, :, :]
|
168 |
-
cam = torch.from_numpy(cam)
|
169 |
-
cam = transform.deaugment_mask(cam)
|
170 |
-
|
171 |
-
# Back to numpy float32, HxW
|
172 |
-
cam = cam.numpy()
|
173 |
-
cam = cam[:, 0, :, :]
|
174 |
-
cams.append(cam)
|
175 |
-
|
176 |
-
cam = np.mean(np.float32(cams), axis=0)
|
177 |
-
return cam
|
178 |
-
|
179 |
-
def __call__(self,
|
180 |
-
input_tensor: torch.Tensor,
|
181 |
-
targets: List[torch.nn.Module] = None,
|
182 |
-
aug_smooth: bool = False,
|
183 |
-
eigen_smooth: bool = False) -> np.ndarray:
|
184 |
-
|
185 |
-
# Smooth the CAM result with test time augmentation
|
186 |
-
if aug_smooth is True:
|
187 |
-
return self.forward_augmentation_smoothing(
|
188 |
-
input_tensor, targets, eigen_smooth)
|
189 |
-
|
190 |
-
return self.forward(input_tensor,
|
191 |
-
targets, eigen_smooth)
|
192 |
-
|
193 |
-
def __del__(self):
|
194 |
-
self.activations_and_grads.release()
|
195 |
-
|
196 |
-
def __enter__(self):
|
197 |
-
return self
|
198 |
-
|
199 |
-
def __exit__(self, exc_type, exc_value, exc_tb):
|
200 |
-
self.activations_and_grads.release()
|
201 |
-
if isinstance(exc_value, IndexError):
|
202 |
-
# Handle IndexError here...
|
203 |
-
print(
|
204 |
-
f"An exception occurred in CAM with block: {exc_type}. Message: {exc_value}")
|
205 |
-
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/eigen_cam.py
DELETED
@@ -1,23 +0,0 @@
|
|
1 |
-
from custom_grad_cam.base_cam import BaseCAM
|
2 |
-
from custom_grad_cam.utils.svd_on_activations import get_2d_projection
|
3 |
-
|
4 |
-
# https://arxiv.org/abs/2008.00299
|
5 |
-
|
6 |
-
|
7 |
-
class EigenCAM(BaseCAM):
|
8 |
-
def __init__(self, model, target_layers, use_cuda=False,
|
9 |
-
reshape_transform=None):
|
10 |
-
super(EigenCAM, self).__init__(model,
|
11 |
-
target_layers,
|
12 |
-
use_cuda,
|
13 |
-
reshape_transform,
|
14 |
-
uses_gradients=False)
|
15 |
-
|
16 |
-
def get_cam_image(self,
|
17 |
-
input_tensor,
|
18 |
-
target_layer,
|
19 |
-
target_category,
|
20 |
-
activations,
|
21 |
-
grads,
|
22 |
-
eigen_smooth):
|
23 |
-
return get_2d_projection(activations)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/eigen_grad_cam.py
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
from custom_grad_cam.base_cam import BaseCAM
|
2 |
-
from custom_grad_cam.utils.svd_on_activations import get_2d_projection
|
3 |
-
|
4 |
-
# Like Eigen CAM: https://arxiv.org/abs/2008.00299
|
5 |
-
# But multiply the activations x gradients
|
6 |
-
|
7 |
-
|
8 |
-
class EigenGradCAM(BaseCAM):
|
9 |
-
def __init__(self, model, target_layers, use_cuda=False,
|
10 |
-
reshape_transform=None):
|
11 |
-
super(EigenGradCAM, self).__init__(model, target_layers, use_cuda,
|
12 |
-
reshape_transform)
|
13 |
-
|
14 |
-
def get_cam_image(self,
|
15 |
-
input_tensor,
|
16 |
-
target_layer,
|
17 |
-
target_category,
|
18 |
-
activations,
|
19 |
-
grads,
|
20 |
-
eigen_smooth):
|
21 |
-
return get_2d_projection(grads * activations)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/feature_factorization/__init__.py
DELETED
File without changes
|
src/custom_code/custom_grad_cam/feature_factorization/deep_feature_factorization.py
DELETED
@@ -1,131 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
from PIL import Image
|
3 |
-
import torch
|
4 |
-
from typing import Callable, List, Tuple, Optional
|
5 |
-
from sklearn.decomposition import NMF
|
6 |
-
from custom_grad_cam.activations_and_gradients import ActivationsAndGradients
|
7 |
-
from custom_grad_cam.utils.image import scale_cam_image, create_labels_legend, show_factorization_on_image
|
8 |
-
|
9 |
-
|
10 |
-
def dff(activations: np.ndarray, n_components: int = 5):
|
11 |
-
""" Compute Deep Feature Factorization on a 2d Activations tensor.
|
12 |
-
|
13 |
-
:param activations: A numpy array of shape batch x channels x height x width
|
14 |
-
:param n_components: The number of components for the non negative matrix factorization
|
15 |
-
:returns: A tuple of the concepts (a numpy array with shape channels x components),
|
16 |
-
and the explanation heatmaps (a numpy arary with shape batch x height x width)
|
17 |
-
"""
|
18 |
-
|
19 |
-
batch_size, channels, h, w = activations.shape
|
20 |
-
reshaped_activations = activations.transpose((1, 0, 2, 3))
|
21 |
-
reshaped_activations[np.isnan(reshaped_activations)] = 0
|
22 |
-
reshaped_activations = reshaped_activations.reshape(
|
23 |
-
reshaped_activations.shape[0], -1)
|
24 |
-
offset = reshaped_activations.min(axis=-1)
|
25 |
-
reshaped_activations = reshaped_activations - offset[:, None]
|
26 |
-
|
27 |
-
model = NMF(n_components=n_components, init='random', random_state=0)
|
28 |
-
W = model.fit_transform(reshaped_activations)
|
29 |
-
H = model.components_
|
30 |
-
concepts = W + offset[:, None]
|
31 |
-
explanations = H.reshape(n_components, batch_size, h, w)
|
32 |
-
explanations = explanations.transpose((1, 0, 2, 3))
|
33 |
-
return concepts, explanations
|
34 |
-
|
35 |
-
|
36 |
-
class DeepFeatureFactorization:
|
37 |
-
""" Deep Feature Factorization: https://arxiv.org/abs/1806.10206
|
38 |
-
This gets a model andcomputes the 2D activations for a target layer,
|
39 |
-
and computes Non Negative Matrix Factorization on the activations.
|
40 |
-
|
41 |
-
Optionally it runs a computation on the concept embeddings,
|
42 |
-
like running a classifier on them.
|
43 |
-
|
44 |
-
The explanation heatmaps are scalled to the range [0, 1]
|
45 |
-
and to the input tensor width and height.
|
46 |
-
"""
|
47 |
-
|
48 |
-
def __init__(self,
|
49 |
-
model: torch.nn.Module,
|
50 |
-
target_layer: torch.nn.Module,
|
51 |
-
reshape_transform: Callable = None,
|
52 |
-
computation_on_concepts=None
|
53 |
-
):
|
54 |
-
self.model = model
|
55 |
-
self.computation_on_concepts = computation_on_concepts
|
56 |
-
self.activations_and_grads = ActivationsAndGradients(
|
57 |
-
self.model, [target_layer], reshape_transform)
|
58 |
-
|
59 |
-
def __call__(self,
|
60 |
-
input_tensor: torch.Tensor,
|
61 |
-
n_components: int = 16):
|
62 |
-
batch_size, channels, h, w = input_tensor.size()
|
63 |
-
_ = self.activations_and_grads(input_tensor)
|
64 |
-
|
65 |
-
with torch.no_grad():
|
66 |
-
activations = self.activations_and_grads.activations[0].cpu(
|
67 |
-
).numpy()
|
68 |
-
|
69 |
-
concepts, explanations = dff(activations, n_components=n_components)
|
70 |
-
|
71 |
-
processed_explanations = []
|
72 |
-
|
73 |
-
for batch in explanations:
|
74 |
-
processed_explanations.append(scale_cam_image(batch, (w, h)))
|
75 |
-
|
76 |
-
if self.computation_on_concepts:
|
77 |
-
with torch.no_grad():
|
78 |
-
concept_tensors = torch.from_numpy(
|
79 |
-
np.float32(concepts).transpose((1, 0)))
|
80 |
-
concept_outputs = self.computation_on_concepts(
|
81 |
-
concept_tensors).cpu().numpy()
|
82 |
-
return concepts, processed_explanations, concept_outputs
|
83 |
-
else:
|
84 |
-
return concepts, processed_explanations
|
85 |
-
|
86 |
-
def __del__(self):
|
87 |
-
self.activations_and_grads.release()
|
88 |
-
|
89 |
-
def __exit__(self, exc_type, exc_value, exc_tb):
|
90 |
-
self.activations_and_grads.release()
|
91 |
-
if isinstance(exc_value, IndexError):
|
92 |
-
# Handle IndexError here...
|
93 |
-
print(
|
94 |
-
f"An exception occurred in ActivationSummary with block: {exc_type}. Message: {exc_value}")
|
95 |
-
return True
|
96 |
-
|
97 |
-
|
98 |
-
def run_dff_on_image(model: torch.nn.Module,
|
99 |
-
target_layer: torch.nn.Module,
|
100 |
-
classifier: torch.nn.Module,
|
101 |
-
img_pil: Image,
|
102 |
-
img_tensor: torch.Tensor,
|
103 |
-
reshape_transform=Optional[Callable],
|
104 |
-
n_components: int = 5,
|
105 |
-
top_k: int = 2) -> np.ndarray:
|
106 |
-
""" Helper function to create a Deep Feature Factorization visualization for a single image.
|
107 |
-
TBD: Run this on a batch with several images.
|
108 |
-
"""
|
109 |
-
rgb_img_float = np.array(img_pil) / 255
|
110 |
-
dff = DeepFeatureFactorization(model=model,
|
111 |
-
reshape_transform=reshape_transform,
|
112 |
-
target_layer=target_layer,
|
113 |
-
computation_on_concepts=classifier)
|
114 |
-
|
115 |
-
concepts, batch_explanations, concept_outputs = dff(
|
116 |
-
img_tensor[None, :], n_components)
|
117 |
-
|
118 |
-
concept_outputs = torch.softmax(
|
119 |
-
torch.from_numpy(concept_outputs),
|
120 |
-
axis=-1).numpy()
|
121 |
-
concept_label_strings = create_labels_legend(concept_outputs,
|
122 |
-
labels=model.config.id2label,
|
123 |
-
top_k=top_k)
|
124 |
-
visualization = show_factorization_on_image(
|
125 |
-
rgb_img_float,
|
126 |
-
batch_explanations[0],
|
127 |
-
image_weight=0.3,
|
128 |
-
concept_labels=concept_label_strings)
|
129 |
-
|
130 |
-
result = np.hstack((np.array(img_pil), visualization))
|
131 |
-
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/feature_factorization/utils.py
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
import requests
|
2 |
-
import numpy as np
|
3 |
-
from typing import Dict
|
4 |
-
|
5 |
-
|
6 |
-
def create_labels_legend(concept_scores: np.ndarray,
|
7 |
-
labels: Dict[int, str],
|
8 |
-
top_k=2):
|
9 |
-
concept_categories = np.argsort(concept_scores, axis=1)[:, ::-1][:, :top_k]
|
10 |
-
concept_labels_topk = []
|
11 |
-
for concept_index in range(concept_categories.shape[0]):
|
12 |
-
categories = concept_categories[concept_index, :]
|
13 |
-
concept_labels = []
|
14 |
-
for category in categories:
|
15 |
-
score = concept_scores[concept_index, category]
|
16 |
-
label = f"{labels[category].split(',')[0]}:{score:.2f}"
|
17 |
-
concept_labels.append(label)
|
18 |
-
concept_labels_topk.append("\n".join(concept_labels))
|
19 |
-
return concept_labels_topk
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/fullgrad_cam.py
DELETED
@@ -1,95 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import torch
|
3 |
-
from custom_grad_cam.base_cam import BaseCAM
|
4 |
-
from custom_grad_cam.utils.find_layers import find_layer_predicate_recursive
|
5 |
-
from custom_grad_cam.utils.svd_on_activations import get_2d_projection
|
6 |
-
from custom_grad_cam.utils.image import scale_accross_batch_and_channels, scale_cam_image
|
7 |
-
|
8 |
-
# https://arxiv.org/abs/1905.00780
|
9 |
-
|
10 |
-
|
11 |
-
class FullGrad(BaseCAM):
|
12 |
-
def __init__(self, model, target_layers, use_cuda=False,
|
13 |
-
reshape_transform=None):
|
14 |
-
if len(target_layers) > 0:
|
15 |
-
print(
|
16 |
-
"Warning: target_layers is ignored in FullGrad. All bias layers will be used instead")
|
17 |
-
|
18 |
-
def layer_with_2D_bias(layer):
|
19 |
-
bias_target_layers = [torch.nn.Conv2d, torch.nn.BatchNorm2d]
|
20 |
-
if type(layer) in bias_target_layers and layer.bias is not None:
|
21 |
-
return True
|
22 |
-
return False
|
23 |
-
target_layers = find_layer_predicate_recursive(
|
24 |
-
model, layer_with_2D_bias)
|
25 |
-
super(
|
26 |
-
FullGrad,
|
27 |
-
self).__init__(
|
28 |
-
model,
|
29 |
-
target_layers,
|
30 |
-
use_cuda,
|
31 |
-
reshape_transform,
|
32 |
-
compute_input_gradient=True)
|
33 |
-
self.bias_data = [self.get_bias_data(
|
34 |
-
layer).cpu().numpy() for layer in target_layers]
|
35 |
-
|
36 |
-
def get_bias_data(self, layer):
|
37 |
-
# Borrowed from official paper impl:
|
38 |
-
# https://github.com/idiap/fullgrad-saliency/blob/master/saliency/tensor_extractor.py#L47
|
39 |
-
if isinstance(layer, torch.nn.BatchNorm2d):
|
40 |
-
bias = - (layer.running_mean * layer.weight
|
41 |
-
/ torch.sqrt(layer.running_var + layer.eps)) + layer.bias
|
42 |
-
return bias.data
|
43 |
-
else:
|
44 |
-
return layer.bias.data
|
45 |
-
|
46 |
-
def compute_cam_per_layer(
|
47 |
-
self,
|
48 |
-
input_tensor,
|
49 |
-
target_category,
|
50 |
-
eigen_smooth):
|
51 |
-
input_grad = input_tensor.grad.data.cpu().numpy()
|
52 |
-
grads_list = [g.cpu().data.numpy() for g in
|
53 |
-
self.activations_and_grads.gradients]
|
54 |
-
cam_per_target_layer = []
|
55 |
-
target_size = self.get_target_width_height(input_tensor)
|
56 |
-
|
57 |
-
gradient_multiplied_input = input_grad * input_tensor.data.cpu().numpy()
|
58 |
-
gradient_multiplied_input = np.abs(gradient_multiplied_input)
|
59 |
-
gradient_multiplied_input = scale_accross_batch_and_channels(
|
60 |
-
gradient_multiplied_input,
|
61 |
-
target_size)
|
62 |
-
cam_per_target_layer.append(gradient_multiplied_input)
|
63 |
-
|
64 |
-
# Loop over the saliency image from every layer
|
65 |
-
assert(len(self.bias_data) == len(grads_list))
|
66 |
-
for bias, grads in zip(self.bias_data, grads_list):
|
67 |
-
bias = bias[None, :, None, None]
|
68 |
-
# In the paper they take the absolute value,
|
69 |
-
# but possibily taking only the positive gradients will work
|
70 |
-
# better.
|
71 |
-
bias_grad = np.abs(bias * grads)
|
72 |
-
result = scale_accross_batch_and_channels(
|
73 |
-
bias_grad, target_size)
|
74 |
-
result = np.sum(result, axis=1)
|
75 |
-
cam_per_target_layer.append(result[:, None, :])
|
76 |
-
cam_per_target_layer = np.concatenate(cam_per_target_layer, axis=1)
|
77 |
-
if eigen_smooth:
|
78 |
-
# Resize to a smaller image, since this method typically has a very large number of channels,
|
79 |
-
# and then consumes a lot of memory
|
80 |
-
cam_per_target_layer = scale_accross_batch_and_channels(
|
81 |
-
cam_per_target_layer, (target_size[0] // 8, target_size[1] // 8))
|
82 |
-
cam_per_target_layer = get_2d_projection(cam_per_target_layer)
|
83 |
-
cam_per_target_layer = cam_per_target_layer[:, None, :, :]
|
84 |
-
cam_per_target_layer = scale_accross_batch_and_channels(
|
85 |
-
cam_per_target_layer,
|
86 |
-
target_size)
|
87 |
-
else:
|
88 |
-
cam_per_target_layer = np.sum(
|
89 |
-
cam_per_target_layer, axis=1)[:, None, :]
|
90 |
-
|
91 |
-
return cam_per_target_layer
|
92 |
-
|
93 |
-
def aggregate_multi_layers(self, cam_per_target_layer):
|
94 |
-
result = np.sum(cam_per_target_layer, axis=1)
|
95 |
-
return scale_cam_image(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/grad_cam.py
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
from custom_grad_cam.base_cam import BaseCAM
|
3 |
-
|
4 |
-
|
5 |
-
class GradCAM(BaseCAM):
|
6 |
-
def __init__(self, model, target_layers, use_cuda=False,
|
7 |
-
reshape_transform=None):
|
8 |
-
super(
|
9 |
-
GradCAM,
|
10 |
-
self).__init__(
|
11 |
-
model,
|
12 |
-
target_layers,
|
13 |
-
use_cuda,
|
14 |
-
reshape_transform)
|
15 |
-
|
16 |
-
def get_cam_weights(self,
|
17 |
-
input_tensor,
|
18 |
-
target_layer,
|
19 |
-
target_category,
|
20 |
-
activations,
|
21 |
-
grads):
|
22 |
-
return np.mean(grads, axis=(2, 3))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/grad_cam_elementwise.py
DELETED
@@ -1,30 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
from custom_grad_cam.base_cam import BaseCAM
|
3 |
-
from custom_grad_cam.utils.svd_on_activations import get_2d_projection
|
4 |
-
|
5 |
-
|
6 |
-
class GradCAMElementWise(BaseCAM):
|
7 |
-
def __init__(self, model, target_layers, use_cuda=False,
|
8 |
-
reshape_transform=None):
|
9 |
-
super(
|
10 |
-
GradCAMElementWise,
|
11 |
-
self).__init__(
|
12 |
-
model,
|
13 |
-
target_layers,
|
14 |
-
use_cuda,
|
15 |
-
reshape_transform)
|
16 |
-
|
17 |
-
def get_cam_image(self,
|
18 |
-
input_tensor,
|
19 |
-
target_layer,
|
20 |
-
target_category,
|
21 |
-
activations,
|
22 |
-
grads,
|
23 |
-
eigen_smooth):
|
24 |
-
elementwise_activations = np.maximum(grads * activations, 0)
|
25 |
-
|
26 |
-
if eigen_smooth:
|
27 |
-
cam = get_2d_projection(elementwise_activations)
|
28 |
-
else:
|
29 |
-
cam = elementwise_activations.sum(axis=1)
|
30 |
-
return cam
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/grad_cam_plusplus.py
DELETED
@@ -1,32 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
from custom_grad_cam.base_cam import BaseCAM
|
3 |
-
|
4 |
-
# https://arxiv.org/abs/1710.11063
|
5 |
-
|
6 |
-
|
7 |
-
class GradCAMPlusPlus(BaseCAM):
|
8 |
-
def __init__(self, model, target_layers, use_cuda=False,
|
9 |
-
reshape_transform=None):
|
10 |
-
super(GradCAMPlusPlus, self).__init__(model, target_layers, use_cuda,
|
11 |
-
reshape_transform)
|
12 |
-
|
13 |
-
def get_cam_weights(self,
|
14 |
-
input_tensor,
|
15 |
-
target_layers,
|
16 |
-
target_category,
|
17 |
-
activations,
|
18 |
-
grads):
|
19 |
-
grads_power_2 = grads**2
|
20 |
-
grads_power_3 = grads_power_2 * grads
|
21 |
-
# Equation 19 in https://arxiv.org/abs/1710.11063
|
22 |
-
sum_activations = np.sum(activations, axis=(2, 3))
|
23 |
-
eps = 0.000001
|
24 |
-
aij = grads_power_2 / (2 * grads_power_2 +
|
25 |
-
sum_activations[:, :, None, None] * grads_power_3 + eps)
|
26 |
-
# Now bring back the ReLU from eq.7 in the paper,
|
27 |
-
# And zero out aijs where the activations are 0
|
28 |
-
aij = np.where(grads != 0, aij, 0)
|
29 |
-
|
30 |
-
weights = np.maximum(grads, 0) * aij
|
31 |
-
weights = np.sum(weights, axis=(2, 3))
|
32 |
-
return weights
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/guided_backprop.py
DELETED
@@ -1,100 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import torch
|
3 |
-
from torch.autograd import Function
|
4 |
-
from custom_grad_cam.utils.find_layers import replace_all_layer_type_recursive
|
5 |
-
|
6 |
-
|
7 |
-
class GuidedBackpropReLU(Function):
|
8 |
-
@staticmethod
|
9 |
-
def forward(self, input_img):
|
10 |
-
positive_mask = (input_img > 0).type_as(input_img)
|
11 |
-
output = torch.addcmul(
|
12 |
-
torch.zeros(
|
13 |
-
input_img.size()).type_as(input_img),
|
14 |
-
input_img,
|
15 |
-
positive_mask)
|
16 |
-
self.save_for_backward(input_img, output)
|
17 |
-
return output
|
18 |
-
|
19 |
-
@staticmethod
|
20 |
-
def backward(self, grad_output):
|
21 |
-
input_img, output = self.saved_tensors
|
22 |
-
grad_input = None
|
23 |
-
|
24 |
-
positive_mask_1 = (input_img > 0).type_as(grad_output)
|
25 |
-
positive_mask_2 = (grad_output > 0).type_as(grad_output)
|
26 |
-
grad_input = torch.addcmul(
|
27 |
-
torch.zeros(
|
28 |
-
input_img.size()).type_as(input_img),
|
29 |
-
torch.addcmul(
|
30 |
-
torch.zeros(
|
31 |
-
input_img.size()).type_as(input_img),
|
32 |
-
grad_output,
|
33 |
-
positive_mask_1),
|
34 |
-
positive_mask_2)
|
35 |
-
return grad_input
|
36 |
-
|
37 |
-
|
38 |
-
class GuidedBackpropReLUasModule(torch.nn.Module):
|
39 |
-
def __init__(self):
|
40 |
-
super(GuidedBackpropReLUasModule, self).__init__()
|
41 |
-
|
42 |
-
def forward(self, input_img):
|
43 |
-
return GuidedBackpropReLU.apply(input_img)
|
44 |
-
|
45 |
-
|
46 |
-
class GuidedBackpropReLUModel:
|
47 |
-
def __init__(self, model, use_cuda):
|
48 |
-
self.model = model
|
49 |
-
self.model.eval()
|
50 |
-
self.cuda = use_cuda
|
51 |
-
if self.cuda:
|
52 |
-
self.model = self.model.cuda()
|
53 |
-
|
54 |
-
def forward(self, input_img):
|
55 |
-
return self.model(input_img)
|
56 |
-
|
57 |
-
def recursive_replace_relu_with_guidedrelu(self, module_top):
|
58 |
-
|
59 |
-
for idx, module in module_top._modules.items():
|
60 |
-
self.recursive_replace_relu_with_guidedrelu(module)
|
61 |
-
if module.__class__.__name__ == 'ReLU':
|
62 |
-
module_top._modules[idx] = GuidedBackpropReLU.apply
|
63 |
-
print("b")
|
64 |
-
|
65 |
-
def recursive_replace_guidedrelu_with_relu(self, module_top):
|
66 |
-
try:
|
67 |
-
for idx, module in module_top._modules.items():
|
68 |
-
self.recursive_replace_guidedrelu_with_relu(module)
|
69 |
-
if module == GuidedBackpropReLU.apply:
|
70 |
-
module_top._modules[idx] = torch.nn.ReLU()
|
71 |
-
except BaseException:
|
72 |
-
pass
|
73 |
-
|
74 |
-
def __call__(self, input_img, target_category=None):
|
75 |
-
replace_all_layer_type_recursive(self.model,
|
76 |
-
torch.nn.ReLU,
|
77 |
-
GuidedBackpropReLUasModule())
|
78 |
-
|
79 |
-
if self.cuda:
|
80 |
-
input_img = input_img.cuda()
|
81 |
-
|
82 |
-
input_img = input_img.requires_grad_(True)
|
83 |
-
|
84 |
-
output = self.forward(input_img)
|
85 |
-
|
86 |
-
if target_category is None:
|
87 |
-
target_category = np.argmax(output.cpu().data.numpy())
|
88 |
-
|
89 |
-
loss = output[0, target_category]
|
90 |
-
loss.backward(retain_graph=True)
|
91 |
-
|
92 |
-
output = input_img.grad.cpu().data.numpy()
|
93 |
-
output = output[0, :, :, :]
|
94 |
-
output = output.transpose((1, 2, 0))
|
95 |
-
|
96 |
-
replace_all_layer_type_recursive(self.model,
|
97 |
-
GuidedBackpropReLUasModule,
|
98 |
-
torch.nn.ReLU())
|
99 |
-
|
100 |
-
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/hirescam.py
DELETED
@@ -1,32 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
from custom_grad_cam.base_cam import BaseCAM
|
3 |
-
from custom_grad_cam.utils.svd_on_activations import get_2d_projection
|
4 |
-
|
5 |
-
|
6 |
-
class HiResCAM(BaseCAM):
|
7 |
-
def __init__(self, model, target_layers, use_cuda=False,
|
8 |
-
reshape_transform=None):
|
9 |
-
super(
|
10 |
-
HiResCAM,
|
11 |
-
self).__init__(
|
12 |
-
model,
|
13 |
-
target_layers,
|
14 |
-
use_cuda,
|
15 |
-
reshape_transform)
|
16 |
-
|
17 |
-
def get_cam_image(self,
|
18 |
-
input_tensor,
|
19 |
-
target_layer,
|
20 |
-
target_category,
|
21 |
-
activations,
|
22 |
-
grads,
|
23 |
-
eigen_smooth):
|
24 |
-
elementwise_activations = grads * activations
|
25 |
-
|
26 |
-
if eigen_smooth:
|
27 |
-
print(
|
28 |
-
"Warning: HiResCAM's faithfulness guarantees do not hold if smoothing is applied")
|
29 |
-
cam = get_2d_projection(elementwise_activations)
|
30 |
-
else:
|
31 |
-
cam = elementwise_activations.sum(axis=1)
|
32 |
-
return cam
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/layer_cam.py
DELETED
@@ -1,36 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
from custom_grad_cam.base_cam import BaseCAM
|
3 |
-
from custom_grad_cam.utils.svd_on_activations import get_2d_projection
|
4 |
-
|
5 |
-
# https://ieeexplore.ieee.org/document/9462463
|
6 |
-
|
7 |
-
|
8 |
-
class LayerCAM(BaseCAM):
|
9 |
-
def __init__(
|
10 |
-
self,
|
11 |
-
model,
|
12 |
-
target_layers,
|
13 |
-
use_cuda=False,
|
14 |
-
reshape_transform=None):
|
15 |
-
super(
|
16 |
-
LayerCAM,
|
17 |
-
self).__init__(
|
18 |
-
model,
|
19 |
-
target_layers,
|
20 |
-
use_cuda,
|
21 |
-
reshape_transform)
|
22 |
-
|
23 |
-
def get_cam_image(self,
|
24 |
-
input_tensor,
|
25 |
-
target_layer,
|
26 |
-
target_category,
|
27 |
-
activations,
|
28 |
-
grads,
|
29 |
-
eigen_smooth):
|
30 |
-
spatial_weighted_activations = np.maximum(grads, 0) * activations
|
31 |
-
|
32 |
-
if eigen_smooth:
|
33 |
-
cam = get_2d_projection(spatial_weighted_activations)
|
34 |
-
else:
|
35 |
-
cam = spatial_weighted_activations.sum(axis=1)
|
36 |
-
return cam
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/metrics/__init__.py
DELETED
File without changes
|
src/custom_code/custom_grad_cam/metrics/cam_mult_image.py
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import numpy as np
|
3 |
-
from typing import List, Callable
|
4 |
-
from custom_grad_cam.metrics.perturbation_confidence import PerturbationConfidenceMetric
|
5 |
-
|
6 |
-
|
7 |
-
def multiply_tensor_with_cam(input_tensor: torch.Tensor,
|
8 |
-
cam: torch.Tensor):
|
9 |
-
""" Multiply an input tensor (after normalization)
|
10 |
-
with a pixel attribution map
|
11 |
-
"""
|
12 |
-
return input_tensor * cam
|
13 |
-
|
14 |
-
|
15 |
-
class CamMultImageConfidenceChange(PerturbationConfidenceMetric):
|
16 |
-
def __init__(self):
|
17 |
-
super(CamMultImageConfidenceChange,
|
18 |
-
self).__init__(multiply_tensor_with_cam)
|
19 |
-
|
20 |
-
|
21 |
-
class DropInConfidence(CamMultImageConfidenceChange):
|
22 |
-
def __init__(self):
|
23 |
-
super(DropInConfidence, self).__init__()
|
24 |
-
|
25 |
-
def __call__(self, *args, **kwargs):
|
26 |
-
scores = super(DropInConfidence, self).__call__(*args, **kwargs)
|
27 |
-
scores = -scores
|
28 |
-
return np.maximum(scores, 0)
|
29 |
-
|
30 |
-
|
31 |
-
class IncreaseInConfidence(CamMultImageConfidenceChange):
|
32 |
-
def __init__(self):
|
33 |
-
super(IncreaseInConfidence, self).__init__()
|
34 |
-
|
35 |
-
def __call__(self, *args, **kwargs):
|
36 |
-
scores = super(IncreaseInConfidence, self).__call__(*args, **kwargs)
|
37 |
-
return np.float32(scores > 0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/metrics/perturbation_confidence.py
DELETED
@@ -1,109 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import numpy as np
|
3 |
-
from typing import List, Callable
|
4 |
-
|
5 |
-
import numpy as np
|
6 |
-
import cv2
|
7 |
-
|
8 |
-
|
9 |
-
class PerturbationConfidenceMetric:
|
10 |
-
def __init__(self, perturbation):
|
11 |
-
self.perturbation = perturbation
|
12 |
-
|
13 |
-
def __call__(self, input_tensor: torch.Tensor,
|
14 |
-
cams: np.ndarray,
|
15 |
-
targets: List[Callable],
|
16 |
-
model: torch.nn.Module,
|
17 |
-
return_visualization=False,
|
18 |
-
return_diff=True):
|
19 |
-
|
20 |
-
if return_diff:
|
21 |
-
with torch.no_grad():
|
22 |
-
outputs = model(input_tensor)
|
23 |
-
scores = [target(output).cpu().numpy()
|
24 |
-
for target, output in zip(targets, outputs)]
|
25 |
-
scores = np.float32(scores)
|
26 |
-
|
27 |
-
batch_size = input_tensor.size(0)
|
28 |
-
perturbated_tensors = []
|
29 |
-
for i in range(batch_size):
|
30 |
-
cam = cams[i]
|
31 |
-
tensor = self.perturbation(input_tensor[i, ...].cpu(),
|
32 |
-
torch.from_numpy(cam))
|
33 |
-
tensor = tensor.to(input_tensor.device)
|
34 |
-
perturbated_tensors.append(tensor.unsqueeze(0))
|
35 |
-
perturbated_tensors = torch.cat(perturbated_tensors)
|
36 |
-
|
37 |
-
with torch.no_grad():
|
38 |
-
outputs_after_imputation = model(perturbated_tensors)
|
39 |
-
scores_after_imputation = [
|
40 |
-
target(output).cpu().numpy() for target, output in zip(
|
41 |
-
targets, outputs_after_imputation)]
|
42 |
-
scores_after_imputation = np.float32(scores_after_imputation)
|
43 |
-
|
44 |
-
if return_diff:
|
45 |
-
result = scores_after_imputation - scores
|
46 |
-
else:
|
47 |
-
result = scores_after_imputation
|
48 |
-
|
49 |
-
if return_visualization:
|
50 |
-
return result, perturbated_tensors
|
51 |
-
else:
|
52 |
-
return result
|
53 |
-
|
54 |
-
|
55 |
-
class RemoveMostRelevantFirst:
|
56 |
-
def __init__(self, percentile, imputer):
|
57 |
-
self.percentile = percentile
|
58 |
-
self.imputer = imputer
|
59 |
-
|
60 |
-
def __call__(self, input_tensor, mask):
|
61 |
-
imputer = self.imputer
|
62 |
-
if self.percentile != 'auto':
|
63 |
-
threshold = np.percentile(mask.cpu().numpy(), self.percentile)
|
64 |
-
binary_mask = np.float32(mask < threshold)
|
65 |
-
else:
|
66 |
-
_, binary_mask = cv2.threshold(
|
67 |
-
np.uint8(mask * 255), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
68 |
-
|
69 |
-
binary_mask = torch.from_numpy(binary_mask)
|
70 |
-
binary_mask = binary_mask.to(mask.device)
|
71 |
-
return imputer(input_tensor, binary_mask)
|
72 |
-
|
73 |
-
|
74 |
-
class RemoveLeastRelevantFirst(RemoveMostRelevantFirst):
|
75 |
-
def __init__(self, percentile, imputer):
|
76 |
-
super(RemoveLeastRelevantFirst, self).__init__(percentile, imputer)
|
77 |
-
|
78 |
-
def __call__(self, input_tensor, mask):
|
79 |
-
return super(RemoveLeastRelevantFirst, self).__call__(
|
80 |
-
input_tensor, 1 - mask)
|
81 |
-
|
82 |
-
|
83 |
-
class AveragerAcrossThresholds:
|
84 |
-
def __init__(
|
85 |
-
self,
|
86 |
-
imputer,
|
87 |
-
percentiles=[
|
88 |
-
10,
|
89 |
-
20,
|
90 |
-
30,
|
91 |
-
40,
|
92 |
-
50,
|
93 |
-
60,
|
94 |
-
70,
|
95 |
-
80,
|
96 |
-
90]):
|
97 |
-
self.imputer = imputer
|
98 |
-
self.percentiles = percentiles
|
99 |
-
|
100 |
-
def __call__(self,
|
101 |
-
input_tensor: torch.Tensor,
|
102 |
-
cams: np.ndarray,
|
103 |
-
targets: List[Callable],
|
104 |
-
model: torch.nn.Module):
|
105 |
-
scores = []
|
106 |
-
for percentile in self.percentiles:
|
107 |
-
imputer = self.imputer(percentile)
|
108 |
-
scores.append(imputer(input_tensor, cams, targets, model))
|
109 |
-
return np.mean(np.float32(scores), axis=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/metrics/road.py
DELETED
@@ -1,181 +0,0 @@
|
|
1 |
-
# A Consistent and Efficient Evaluation Strategy for Attribution Methods
|
2 |
-
# https://arxiv.org/abs/2202.00449
|
3 |
-
# Taken from https://raw.githubusercontent.com/tleemann/road_evaluation/main/imputations.py
|
4 |
-
# MIT License
|
5 |
-
|
6 |
-
# Copyright (c) 2022 Tobias Leemann
|
7 |
-
|
8 |
-
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
9 |
-
# of this software and associated documentation files (the "Software"), to deal
|
10 |
-
# in the Software without restriction, including without limitation the rights
|
11 |
-
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
12 |
-
# copies of the Software, and to permit persons to whom the Software is
|
13 |
-
# furnished to do so, subject to the following conditions:
|
14 |
-
|
15 |
-
# The above copyright notice and this permission notice shall be included in all
|
16 |
-
# copies or substantial portions of the Software.
|
17 |
-
|
18 |
-
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
19 |
-
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
20 |
-
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
21 |
-
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
22 |
-
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
23 |
-
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
24 |
-
# SOFTWARE.
|
25 |
-
|
26 |
-
|
27 |
-
# Implementations of our imputation models.
|
28 |
-
import torch
|
29 |
-
import numpy as np
|
30 |
-
from scipy.sparse import lil_matrix, csc_matrix
|
31 |
-
from scipy.sparse.linalg import spsolve
|
32 |
-
from typing import List, Callable
|
33 |
-
from custom_grad_cam.metrics.perturbation_confidence import PerturbationConfidenceMetric, \
|
34 |
-
AveragerAcrossThresholds, \
|
35 |
-
RemoveMostRelevantFirst, \
|
36 |
-
RemoveLeastRelevantFirst
|
37 |
-
|
38 |
-
# The weights of the surrounding pixels
|
39 |
-
neighbors_weights = [((1, 1), 1 / 12),
|
40 |
-
((0, 1), 1 / 6),
|
41 |
-
((-1, 1), 1 / 12),
|
42 |
-
((1, -1), 1 / 12),
|
43 |
-
((0, -1), 1 / 6),
|
44 |
-
((-1, -1), 1 / 12),
|
45 |
-
((1, 0), 1 / 6),
|
46 |
-
((-1, 0), 1 / 6)]
|
47 |
-
|
48 |
-
|
49 |
-
class NoisyLinearImputer:
|
50 |
-
def __init__(self,
|
51 |
-
noise: float = 0.01,
|
52 |
-
weighting: List[float] = neighbors_weights):
|
53 |
-
"""
|
54 |
-
Noisy linear imputation.
|
55 |
-
noise: magnitude of noise to add (absolute, set to 0 for no noise)
|
56 |
-
weighting: Weights of the neighboring pixels in the computation.
|
57 |
-
List of tuples of (offset, weight)
|
58 |
-
"""
|
59 |
-
self.noise = noise
|
60 |
-
self.weighting = neighbors_weights
|
61 |
-
|
62 |
-
@staticmethod
|
63 |
-
def add_offset_to_indices(indices, offset, mask_shape):
|
64 |
-
""" Add the corresponding offset to the indices.
|
65 |
-
Return new indices plus a valid bit-vector. """
|
66 |
-
cord1 = indices % mask_shape[1]
|
67 |
-
cord0 = indices // mask_shape[1]
|
68 |
-
cord0 += offset[0]
|
69 |
-
cord1 += offset[1]
|
70 |
-
valid = ((cord0 < 0) | (cord1 < 0) |
|
71 |
-
(cord0 >= mask_shape[0]) |
|
72 |
-
(cord1 >= mask_shape[1]))
|
73 |
-
return ~valid, indices + offset[0] * mask_shape[1] + offset[1]
|
74 |
-
|
75 |
-
@staticmethod
|
76 |
-
def setup_sparse_system(mask, img, neighbors_weights):
|
77 |
-
""" Vectorized version to set up the equation system.
|
78 |
-
mask: (H, W)-tensor of missing pixels.
|
79 |
-
Image: (H, W, C)-tensor of all values.
|
80 |
-
Return (N,N)-System matrix, (N,C)-Right hand side for each of the C channels.
|
81 |
-
"""
|
82 |
-
maskflt = mask.flatten()
|
83 |
-
imgflat = img.reshape((img.shape[0], -1))
|
84 |
-
# Indices that are imputed in the flattened mask:
|
85 |
-
indices = np.argwhere(maskflt == 0).flatten()
|
86 |
-
coords_to_vidx = np.zeros(len(maskflt), dtype=int)
|
87 |
-
coords_to_vidx[indices] = np.arange(len(indices))
|
88 |
-
numEquations = len(indices)
|
89 |
-
# System matrix:
|
90 |
-
A = lil_matrix((numEquations, numEquations))
|
91 |
-
b = np.zeros((numEquations, img.shape[0]))
|
92 |
-
# Sum of weights assigned:
|
93 |
-
sum_neighbors = np.ones(numEquations)
|
94 |
-
for n in neighbors_weights:
|
95 |
-
offset, weight = n[0], n[1]
|
96 |
-
# Take out outliers
|
97 |
-
valid, new_coords = NoisyLinearImputer.add_offset_to_indices(
|
98 |
-
indices, offset, mask.shape)
|
99 |
-
valid_coords = new_coords[valid]
|
100 |
-
valid_ids = np.argwhere(valid == 1).flatten()
|
101 |
-
# Add values to the right hand-side
|
102 |
-
has_values_coords = valid_coords[maskflt[valid_coords] > 0.5]
|
103 |
-
has_values_ids = valid_ids[maskflt[valid_coords] > 0.5]
|
104 |
-
b[has_values_ids, :] -= weight * imgflat[:, has_values_coords].T
|
105 |
-
# Add weights to the system (left hand side)
|
106 |
-
# Find coordinates in the system.
|
107 |
-
has_no_values = valid_coords[maskflt[valid_coords] < 0.5]
|
108 |
-
variable_ids = coords_to_vidx[has_no_values]
|
109 |
-
has_no_values_ids = valid_ids[maskflt[valid_coords] < 0.5]
|
110 |
-
A[has_no_values_ids, variable_ids] = weight
|
111 |
-
# Reduce weight for invalid
|
112 |
-
sum_neighbors[np.argwhere(valid == 0).flatten()] = \
|
113 |
-
sum_neighbors[np.argwhere(valid == 0).flatten()] - weight
|
114 |
-
|
115 |
-
A[np.arange(numEquations), np.arange(numEquations)] = -sum_neighbors
|
116 |
-
return A, b
|
117 |
-
|
118 |
-
def __call__(self, img: torch.Tensor, mask: torch.Tensor):
|
119 |
-
""" Our linear inputation scheme. """
|
120 |
-
"""
|
121 |
-
This is the function to do the linear infilling
|
122 |
-
img: original image (C,H,W)-tensor;
|
123 |
-
mask: mask; (H,W)-tensor
|
124 |
-
|
125 |
-
"""
|
126 |
-
imgflt = img.reshape(img.shape[0], -1)
|
127 |
-
maskflt = mask.reshape(-1)
|
128 |
-
# Indices that need to be imputed.
|
129 |
-
indices_linear = np.argwhere(maskflt == 0).flatten()
|
130 |
-
# Set up sparse equation system, solve system.
|
131 |
-
A, b = NoisyLinearImputer.setup_sparse_system(
|
132 |
-
mask.numpy(), img.numpy(), neighbors_weights)
|
133 |
-
res = torch.tensor(spsolve(csc_matrix(A), b), dtype=torch.float)
|
134 |
-
|
135 |
-
# Fill the values with the solution of the system.
|
136 |
-
img_infill = imgflt.clone()
|
137 |
-
img_infill[:, indices_linear] = res.t() + self.noise * \
|
138 |
-
torch.randn_like(res.t())
|
139 |
-
|
140 |
-
return img_infill.reshape_as(img)
|
141 |
-
|
142 |
-
|
143 |
-
class ROADMostRelevantFirst(PerturbationConfidenceMetric):
|
144 |
-
def __init__(self, percentile=80):
|
145 |
-
super(ROADMostRelevantFirst, self).__init__(
|
146 |
-
RemoveMostRelevantFirst(percentile, NoisyLinearImputer()))
|
147 |
-
|
148 |
-
|
149 |
-
class ROADLeastRelevantFirst(PerturbationConfidenceMetric):
|
150 |
-
def __init__(self, percentile=20):
|
151 |
-
super(ROADLeastRelevantFirst, self).__init__(
|
152 |
-
RemoveLeastRelevantFirst(percentile, NoisyLinearImputer()))
|
153 |
-
|
154 |
-
|
155 |
-
class ROADMostRelevantFirstAverage(AveragerAcrossThresholds):
|
156 |
-
def __init__(self, percentiles=[10, 20, 30, 40, 50, 60, 70, 80, 90]):
|
157 |
-
super(ROADMostRelevantFirstAverage, self).__init__(
|
158 |
-
ROADMostRelevantFirst, percentiles)
|
159 |
-
|
160 |
-
|
161 |
-
class ROADLeastRelevantFirstAverage(AveragerAcrossThresholds):
|
162 |
-
def __init__(self, percentiles=[10, 20, 30, 40, 50, 60, 70, 80, 90]):
|
163 |
-
super(ROADLeastRelevantFirstAverage, self).__init__(
|
164 |
-
ROADLeastRelevantFirst, percentiles)
|
165 |
-
|
166 |
-
|
167 |
-
class ROADCombined:
|
168 |
-
def __init__(self, percentiles=[10, 20, 30, 40, 50, 60, 70, 80, 90]):
|
169 |
-
self.percentiles = percentiles
|
170 |
-
self.morf_averager = ROADMostRelevantFirstAverage(percentiles)
|
171 |
-
self.lerf_averager = ROADLeastRelevantFirstAverage(percentiles)
|
172 |
-
|
173 |
-
def __call__(self,
|
174 |
-
input_tensor: torch.Tensor,
|
175 |
-
cams: np.ndarray,
|
176 |
-
targets: List[Callable],
|
177 |
-
model: torch.nn.Module):
|
178 |
-
|
179 |
-
scores_lerf = self.lerf_averager(input_tensor, cams, targets, model)
|
180 |
-
scores_morf = self.morf_averager(input_tensor, cams, targets, model)
|
181 |
-
return (scores_lerf - scores_morf) / 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/random_cam.py
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
from custom_grad_cam.base_cam import BaseCAM
|
3 |
-
|
4 |
-
|
5 |
-
class RandomCAM(BaseCAM):
|
6 |
-
def __init__(self, model, target_layers, use_cuda=False,
|
7 |
-
reshape_transform=None):
|
8 |
-
super(
|
9 |
-
RandomCAM,
|
10 |
-
self).__init__(
|
11 |
-
model,
|
12 |
-
target_layers,
|
13 |
-
use_cuda,
|
14 |
-
reshape_transform)
|
15 |
-
|
16 |
-
def get_cam_weights(self,
|
17 |
-
input_tensor,
|
18 |
-
target_layer,
|
19 |
-
target_category,
|
20 |
-
activations,
|
21 |
-
grads):
|
22 |
-
return np.random.uniform(-1, 1, size=(grads.shape[0], grads.shape[1]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/score_cam.py
DELETED
@@ -1,60 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import tqdm
|
3 |
-
from custom_grad_cam.base_cam import BaseCAM
|
4 |
-
|
5 |
-
|
6 |
-
class ScoreCAM(BaseCAM):
|
7 |
-
def __init__(
|
8 |
-
self,
|
9 |
-
model,
|
10 |
-
target_layers,
|
11 |
-
use_cuda=False,
|
12 |
-
reshape_transform=None):
|
13 |
-
super(ScoreCAM, self).__init__(model,
|
14 |
-
target_layers,
|
15 |
-
use_cuda,
|
16 |
-
reshape_transform=reshape_transform,
|
17 |
-
uses_gradients=False)
|
18 |
-
|
19 |
-
def get_cam_weights(self,
|
20 |
-
input_tensor,
|
21 |
-
target_layer,
|
22 |
-
targets,
|
23 |
-
activations,
|
24 |
-
grads):
|
25 |
-
with torch.no_grad():
|
26 |
-
upsample = torch.nn.UpsamplingBilinear2d(
|
27 |
-
size=input_tensor.shape[-2:])
|
28 |
-
activation_tensor = torch.from_numpy(activations)
|
29 |
-
if self.cuda:
|
30 |
-
activation_tensor = activation_tensor.cuda()
|
31 |
-
|
32 |
-
upsampled = upsample(activation_tensor)
|
33 |
-
|
34 |
-
maxs = upsampled.view(upsampled.size(0),
|
35 |
-
upsampled.size(1), -1).max(dim=-1)[0]
|
36 |
-
mins = upsampled.view(upsampled.size(0),
|
37 |
-
upsampled.size(1), -1).min(dim=-1)[0]
|
38 |
-
|
39 |
-
maxs, mins = maxs[:, :, None, None], mins[:, :, None, None]
|
40 |
-
upsampled = (upsampled - mins) / (maxs - mins)
|
41 |
-
|
42 |
-
input_tensors = input_tensor[:, None,
|
43 |
-
:, :] * upsampled[:, :, None, :, :]
|
44 |
-
|
45 |
-
if hasattr(self, "batch_size"):
|
46 |
-
BATCH_SIZE = self.batch_size
|
47 |
-
else:
|
48 |
-
BATCH_SIZE = 16
|
49 |
-
|
50 |
-
scores = []
|
51 |
-
for target, tensor in zip(targets, input_tensors):
|
52 |
-
for i in tqdm.tqdm(range(0, tensor.size(0), BATCH_SIZE)):
|
53 |
-
batch = tensor[i: i + BATCH_SIZE, :]
|
54 |
-
outputs = [target(o).cpu().item()
|
55 |
-
for o in self.model(batch)]
|
56 |
-
scores.extend(outputs)
|
57 |
-
scores = torch.Tensor(scores)
|
58 |
-
scores = scores.view(activations.shape[0], activations.shape[1])
|
59 |
-
weights = torch.nn.Softmax(dim=-1)(scores).numpy()
|
60 |
-
return weights
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/sobel_cam.py
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
import cv2
|
2 |
-
|
3 |
-
|
4 |
-
def sobel_cam(img):
|
5 |
-
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
|
6 |
-
grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
|
7 |
-
grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
|
8 |
-
abs_grad_x = cv2.convertScaleAbs(grad_x)
|
9 |
-
abs_grad_y = cv2.convertScaleAbs(grad_y)
|
10 |
-
grad = cv2.addWeighted(abs_grad_x, 0.5, abs_grad_y, 0.5, 0)
|
11 |
-
return grad
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/utils/__init__.py
DELETED
@@ -1,4 +0,0 @@
|
|
1 |
-
from custom_grad_cam.utils.image import deprocess_image
|
2 |
-
from custom_grad_cam.utils.svd_on_activations import get_2d_projection
|
3 |
-
from custom_grad_cam.utils import model_targets
|
4 |
-
from custom_grad_cam.utils import reshape_transforms
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/utils/find_layers.py
DELETED
@@ -1,30 +0,0 @@
|
|
1 |
-
def replace_layer_recursive(model, old_layer, new_layer):
|
2 |
-
for name, layer in model._modules.items():
|
3 |
-
if layer == old_layer:
|
4 |
-
model._modules[name] = new_layer
|
5 |
-
return True
|
6 |
-
elif replace_layer_recursive(layer, old_layer, new_layer):
|
7 |
-
return True
|
8 |
-
return False
|
9 |
-
|
10 |
-
|
11 |
-
def replace_all_layer_type_recursive(model, old_layer_type, new_layer):
|
12 |
-
for name, layer in model._modules.items():
|
13 |
-
if isinstance(layer, old_layer_type):
|
14 |
-
model._modules[name] = new_layer
|
15 |
-
replace_all_layer_type_recursive(layer, old_layer_type, new_layer)
|
16 |
-
|
17 |
-
|
18 |
-
def find_layer_types_recursive(model, layer_types):
|
19 |
-
def predicate(layer):
|
20 |
-
return type(layer) in layer_types
|
21 |
-
return find_layer_predicate_recursive(model, predicate)
|
22 |
-
|
23 |
-
|
24 |
-
def find_layer_predicate_recursive(model, predicate):
|
25 |
-
result = []
|
26 |
-
for name, layer in model._modules.items():
|
27 |
-
if predicate(layer):
|
28 |
-
result.append(layer)
|
29 |
-
result.extend(find_layer_predicate_recursive(layer, predicate))
|
30 |
-
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/utils/image.py
DELETED
@@ -1,183 +0,0 @@
|
|
1 |
-
import matplotlib
|
2 |
-
from matplotlib import pyplot as plt
|
3 |
-
from matplotlib.lines import Line2D
|
4 |
-
import cv2
|
5 |
-
import numpy as np
|
6 |
-
import torch
|
7 |
-
from torchvision.transforms import Compose, Normalize, ToTensor
|
8 |
-
from typing import List, Dict
|
9 |
-
import math
|
10 |
-
|
11 |
-
|
12 |
-
def preprocess_image(
|
13 |
-
img: np.ndarray, mean=[
|
14 |
-
0.5, 0.5, 0.5], std=[
|
15 |
-
0.5, 0.5, 0.5]) -> torch.Tensor:
|
16 |
-
preprocessing = Compose([
|
17 |
-
ToTensor(),
|
18 |
-
Normalize(mean=mean, std=std)
|
19 |
-
])
|
20 |
-
return preprocessing(img.copy()).unsqueeze(0)
|
21 |
-
|
22 |
-
|
23 |
-
def deprocess_image(img):
|
24 |
-
""" see https://github.com/jacobgil/keras-grad-cam/blob/master/grad-cam.py#L65 """
|
25 |
-
img = img - np.mean(img)
|
26 |
-
img = img / (np.std(img) + 1e-5)
|
27 |
-
img = img * 0.1
|
28 |
-
img = img + 0.5
|
29 |
-
img = np.clip(img, 0, 1)
|
30 |
-
return np.uint8(img * 255)
|
31 |
-
|
32 |
-
|
33 |
-
def show_cam_on_image(img: np.ndarray,
|
34 |
-
mask: np.ndarray,
|
35 |
-
use_rgb: bool = False,
|
36 |
-
colormap: int = cv2.COLORMAP_JET,
|
37 |
-
image_weight: float = 0.5) -> np.ndarray:
|
38 |
-
""" This function overlays the cam mask on the image as an heatmap.
|
39 |
-
By default the heatmap is in BGR format.
|
40 |
-
|
41 |
-
:param img: The base image in RGB or BGR format.
|
42 |
-
:param mask: The cam mask.
|
43 |
-
:param use_rgb: Whether to use an RGB or BGR heatmap, this should be set to True if 'img' is in RGB format.
|
44 |
-
:param colormap: The OpenCV colormap to be used.
|
45 |
-
:param image_weight: The final result is image_weight * img + (1-image_weight) * mask.
|
46 |
-
:returns: The default image with the cam overlay.
|
47 |
-
"""
|
48 |
-
heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap)
|
49 |
-
if use_rgb:
|
50 |
-
heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
|
51 |
-
heatmap = np.float32(heatmap) / 255
|
52 |
-
|
53 |
-
if np.max(img) > 1:
|
54 |
-
raise Exception(
|
55 |
-
"The input image should np.float32 in the range [0, 1]")
|
56 |
-
|
57 |
-
if image_weight < 0 or image_weight > 1:
|
58 |
-
raise Exception(
|
59 |
-
f"image_weight should be in the range [0, 1].\
|
60 |
-
Got: {image_weight}")
|
61 |
-
|
62 |
-
cam = (1 - image_weight) * heatmap + image_weight * img
|
63 |
-
cam = cam / np.max(cam)
|
64 |
-
return np.uint8(255 * cam)
|
65 |
-
|
66 |
-
|
67 |
-
def create_labels_legend(concept_scores: np.ndarray,
|
68 |
-
labels: Dict[int, str],
|
69 |
-
top_k=2):
|
70 |
-
concept_categories = np.argsort(concept_scores, axis=1)[:, ::-1][:, :top_k]
|
71 |
-
concept_labels_topk = []
|
72 |
-
for concept_index in range(concept_categories.shape[0]):
|
73 |
-
categories = concept_categories[concept_index, :]
|
74 |
-
concept_labels = []
|
75 |
-
for category in categories:
|
76 |
-
score = concept_scores[concept_index, category]
|
77 |
-
label = f"{','.join(labels[category].split(',')[:3])}:{score:.2f}"
|
78 |
-
concept_labels.append(label)
|
79 |
-
concept_labels_topk.append("\n".join(concept_labels))
|
80 |
-
return concept_labels_topk
|
81 |
-
|
82 |
-
|
83 |
-
def show_factorization_on_image(img: np.ndarray,
|
84 |
-
explanations: np.ndarray,
|
85 |
-
colors: List[np.ndarray] = None,
|
86 |
-
image_weight: float = 0.5,
|
87 |
-
concept_labels: List = None) -> np.ndarray:
|
88 |
-
""" Color code the different component heatmaps on top of the image.
|
89 |
-
Every component color code will be magnified according to the heatmap itensity
|
90 |
-
(by modifying the V channel in the HSV color space),
|
91 |
-
and optionally create a lagend that shows the labels.
|
92 |
-
|
93 |
-
Since different factorization component heatmaps can overlap in principle,
|
94 |
-
we need a strategy to decide how to deal with the overlaps.
|
95 |
-
This keeps the component that has a higher value in it's heatmap.
|
96 |
-
|
97 |
-
:param img: The base image RGB format.
|
98 |
-
:param explanations: A tensor of shape num_componetns x height x width, with the component visualizations.
|
99 |
-
:param colors: List of R, G, B colors to be used for the components.
|
100 |
-
If None, will use the gist_rainbow cmap as a default.
|
101 |
-
:param image_weight: The final result is image_weight * img + (1-image_weight) * visualization.
|
102 |
-
:concept_labels: A list of strings for every component. If this is paseed, a legend that shows
|
103 |
-
the labels and their colors will be added to the image.
|
104 |
-
:returns: The visualized image.
|
105 |
-
"""
|
106 |
-
n_components = explanations.shape[0]
|
107 |
-
if colors is None:
|
108 |
-
# taken from https://github.com/edocollins/DFF/blob/master/utils.py
|
109 |
-
_cmap = plt.cm.get_cmap('gist_rainbow')
|
110 |
-
colors = [
|
111 |
-
np.array(
|
112 |
-
_cmap(i)) for i in np.arange(
|
113 |
-
0,
|
114 |
-
1,
|
115 |
-
1.0 /
|
116 |
-
n_components)]
|
117 |
-
concept_per_pixel = explanations.argmax(axis=0)
|
118 |
-
masks = []
|
119 |
-
for i in range(n_components):
|
120 |
-
mask = np.zeros(shape=(img.shape[0], img.shape[1], 3))
|
121 |
-
mask[:, :, :] = colors[i][:3]
|
122 |
-
explanation = explanations[i]
|
123 |
-
explanation[concept_per_pixel != i] = 0
|
124 |
-
mask = np.uint8(mask * 255)
|
125 |
-
mask = cv2.cvtColor(mask, cv2.COLOR_RGB2HSV)
|
126 |
-
mask[:, :, 2] = np.uint8(255 * explanation)
|
127 |
-
mask = cv2.cvtColor(mask, cv2.COLOR_HSV2RGB)
|
128 |
-
mask = np.float32(mask) / 255
|
129 |
-
masks.append(mask)
|
130 |
-
|
131 |
-
mask = np.sum(np.float32(masks), axis=0)
|
132 |
-
result = img * image_weight + mask * (1 - image_weight)
|
133 |
-
result = np.uint8(result * 255)
|
134 |
-
|
135 |
-
if concept_labels is not None:
|
136 |
-
px = 1 / plt.rcParams['figure.dpi'] # pixel in inches
|
137 |
-
fig = plt.figure(figsize=(result.shape[1] * px, result.shape[0] * px))
|
138 |
-
plt.rcParams['legend.fontsize'] = int(
|
139 |
-
14 * result.shape[0] / 256 / max(1, n_components / 6))
|
140 |
-
lw = 5 * result.shape[0] / 256
|
141 |
-
lines = [Line2D([0], [0], color=colors[i], lw=lw)
|
142 |
-
for i in range(n_components)]
|
143 |
-
plt.legend(lines,
|
144 |
-
concept_labels,
|
145 |
-
mode="expand",
|
146 |
-
fancybox=True,
|
147 |
-
shadow=True)
|
148 |
-
|
149 |
-
plt.tight_layout(pad=0, w_pad=0, h_pad=0)
|
150 |
-
plt.axis('off')
|
151 |
-
fig.canvas.draw()
|
152 |
-
data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
|
153 |
-
plt.close(fig=fig)
|
154 |
-
data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
|
155 |
-
data = cv2.resize(data, (result.shape[1], result.shape[0]))
|
156 |
-
result = np.hstack((result, data))
|
157 |
-
return result
|
158 |
-
|
159 |
-
|
160 |
-
def scale_cam_image(cam, target_size=None):
|
161 |
-
result = []
|
162 |
-
for img in cam:
|
163 |
-
img = img - np.min(img)
|
164 |
-
img = img / (1e-7 + np.max(img))
|
165 |
-
if target_size is not None:
|
166 |
-
img = cv2.resize(img, target_size)
|
167 |
-
result.append(img)
|
168 |
-
result = np.float32(result)
|
169 |
-
|
170 |
-
return result
|
171 |
-
|
172 |
-
|
173 |
-
def scale_accross_batch_and_channels(tensor, target_size):
|
174 |
-
batch_size, channel_size = tensor.shape[:2]
|
175 |
-
reshaped_tensor = tensor.reshape(
|
176 |
-
batch_size * channel_size, *tensor.shape[2:])
|
177 |
-
result = scale_cam_image(reshaped_tensor, target_size)
|
178 |
-
result = result.reshape(
|
179 |
-
batch_size,
|
180 |
-
channel_size,
|
181 |
-
target_size[1],
|
182 |
-
target_size[0])
|
183 |
-
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/utils/model_targets.py
DELETED
@@ -1,103 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import torch
|
3 |
-
import torchvision
|
4 |
-
|
5 |
-
|
6 |
-
class ClassifierOutputTarget:
|
7 |
-
def __init__(self, category):
|
8 |
-
self.category = category
|
9 |
-
|
10 |
-
def __call__(self, model_output):
|
11 |
-
if len(model_output.shape) == 1:
|
12 |
-
return model_output[self.category]
|
13 |
-
return model_output[:, self.category]
|
14 |
-
|
15 |
-
|
16 |
-
class ClassifierOutputSoftmaxTarget:
|
17 |
-
def __init__(self, category):
|
18 |
-
self.category = category
|
19 |
-
|
20 |
-
def __call__(self, model_output):
|
21 |
-
if len(model_output.shape) == 1:
|
22 |
-
return torch.softmax(model_output, dim=-1)[self.category]
|
23 |
-
return torch.softmax(model_output, dim=-1)[:, self.category]
|
24 |
-
|
25 |
-
|
26 |
-
class BinaryClassifierOutputTarget:
|
27 |
-
def __init__(self, category):
|
28 |
-
self.category = category
|
29 |
-
|
30 |
-
def __call__(self, model_output):
|
31 |
-
if self.category == 1:
|
32 |
-
sign = 1
|
33 |
-
else:
|
34 |
-
sign = -1
|
35 |
-
return torch.abs(model_output) * sign
|
36 |
-
|
37 |
-
|
38 |
-
class SoftmaxOutputTarget:
|
39 |
-
def __init__(self):
|
40 |
-
pass
|
41 |
-
|
42 |
-
def __call__(self, model_output):
|
43 |
-
return torch.softmax(model_output, dim=-1)
|
44 |
-
|
45 |
-
|
46 |
-
class RawScoresOutputTarget:
|
47 |
-
def __init__(self):
|
48 |
-
pass
|
49 |
-
|
50 |
-
def __call__(self, model_output):
|
51 |
-
return model_output
|
52 |
-
|
53 |
-
|
54 |
-
class SemanticSegmentationTarget:
|
55 |
-
""" Gets a binary spatial mask and a category,
|
56 |
-
And return the sum of the category scores,
|
57 |
-
of the pixels in the mask. """
|
58 |
-
|
59 |
-
def __init__(self, category, mask):
|
60 |
-
self.category = category
|
61 |
-
self.mask = torch.from_numpy(mask)
|
62 |
-
if torch.cuda.is_available():
|
63 |
-
self.mask = self.mask.cuda()
|
64 |
-
|
65 |
-
def __call__(self, model_output):
|
66 |
-
return (model_output[self.category, :, :] * self.mask).sum()
|
67 |
-
|
68 |
-
|
69 |
-
class FasterRCNNBoxScoreTarget:
|
70 |
-
""" For every original detected bounding box specified in "bounding boxes",
|
71 |
-
assign a score on how the current bounding boxes match it,
|
72 |
-
1. In IOU
|
73 |
-
2. In the classification score.
|
74 |
-
If there is not a large enough overlap, or the category changed,
|
75 |
-
assign a score of 0.
|
76 |
-
|
77 |
-
The total score is the sum of all the box scores.
|
78 |
-
"""
|
79 |
-
|
80 |
-
def __init__(self, labels, bounding_boxes, iou_threshold=0.5):
|
81 |
-
self.labels = labels
|
82 |
-
self.bounding_boxes = bounding_boxes
|
83 |
-
self.iou_threshold = iou_threshold
|
84 |
-
|
85 |
-
def __call__(self, model_outputs):
|
86 |
-
output = torch.Tensor([0])
|
87 |
-
if torch.cuda.is_available():
|
88 |
-
output = output.cuda()
|
89 |
-
|
90 |
-
if len(model_outputs["boxes"]) == 0:
|
91 |
-
return output
|
92 |
-
|
93 |
-
for box, label in zip(self.bounding_boxes, self.labels):
|
94 |
-
box = torch.Tensor(box[None, :])
|
95 |
-
if torch.cuda.is_available():
|
96 |
-
box = box.cuda()
|
97 |
-
|
98 |
-
ious = torchvision.ops.box_iou(box, model_outputs["boxes"])
|
99 |
-
index = ious.argmax()
|
100 |
-
if ious[0, index] > self.iou_threshold and model_outputs["labels"][index] == label:
|
101 |
-
score = ious[0, index] + model_outputs["scores"][index]
|
102 |
-
output = output + score
|
103 |
-
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/utils/reshape_transforms.py
DELETED
@@ -1,34 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
|
3 |
-
|
4 |
-
def fasterrcnn_reshape_transform(x):
|
5 |
-
target_size = x['pool'].size()[-2:]
|
6 |
-
activations = []
|
7 |
-
for key, value in x.items():
|
8 |
-
activations.append(
|
9 |
-
torch.nn.functional.interpolate(
|
10 |
-
torch.abs(value),
|
11 |
-
target_size,
|
12 |
-
mode='bilinear'))
|
13 |
-
activations = torch.cat(activations, axis=1)
|
14 |
-
return activations
|
15 |
-
|
16 |
-
|
17 |
-
def swinT_reshape_transform(tensor, height=7, width=7):
|
18 |
-
result = tensor.reshape(tensor.size(0),
|
19 |
-
height, width, tensor.size(2))
|
20 |
-
|
21 |
-
# Bring the channels to the first dimension,
|
22 |
-
# like in CNNs.
|
23 |
-
result = result.transpose(2, 3).transpose(1, 2)
|
24 |
-
return result
|
25 |
-
|
26 |
-
|
27 |
-
def vit_reshape_transform(tensor, height=14, width=14):
|
28 |
-
result = tensor[:, 1:, :].reshape(tensor.size(0),
|
29 |
-
height, width, tensor.size(2))
|
30 |
-
|
31 |
-
# Bring the channels to the first dimension,
|
32 |
-
# like in CNNs.
|
33 |
-
result = result.transpose(2, 3).transpose(1, 2)
|
34 |
-
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/utils/svd_on_activations.py
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
|
3 |
-
|
4 |
-
def get_2d_projection(activation_batch):
|
5 |
-
# TBD: use pytorch batch svd implementation
|
6 |
-
activation_batch[np.isnan(activation_batch)] = 0
|
7 |
-
projections = []
|
8 |
-
for activations in activation_batch:
|
9 |
-
reshaped_activations = (activations).reshape(
|
10 |
-
activations.shape[0], -1).transpose()
|
11 |
-
# Centering before the SVD seems to be important here,
|
12 |
-
# Otherwise the image returned is negative
|
13 |
-
reshaped_activations = reshaped_activations - \
|
14 |
-
reshaped_activations.mean(axis=0)
|
15 |
-
U, S, VT = np.linalg.svd(reshaped_activations, full_matrices=True)
|
16 |
-
projection = reshaped_activations @ VT[0, :]
|
17 |
-
projection = projection.reshape(activations.shape[1:])
|
18 |
-
projections.append(projection)
|
19 |
-
return np.float32(projections)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/custom_code/custom_grad_cam/xgrad_cam.py
DELETED
@@ -1,31 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
from custom_grad_cam.base_cam import BaseCAM
|
3 |
-
|
4 |
-
|
5 |
-
class XGradCAM(BaseCAM):
|
6 |
-
def __init__(
|
7 |
-
self,
|
8 |
-
model,
|
9 |
-
target_layers,
|
10 |
-
use_cuda=False,
|
11 |
-
reshape_transform=None):
|
12 |
-
super(
|
13 |
-
XGradCAM,
|
14 |
-
self).__init__(
|
15 |
-
model,
|
16 |
-
target_layers,
|
17 |
-
use_cuda,
|
18 |
-
reshape_transform)
|
19 |
-
|
20 |
-
def get_cam_weights(self,
|
21 |
-
input_tensor,
|
22 |
-
target_layer,
|
23 |
-
target_category,
|
24 |
-
activations,
|
25 |
-
grads):
|
26 |
-
sum_activations = np.sum(activations, axis=(2, 3))
|
27 |
-
eps = 1e-7
|
28 |
-
weights = grads * activations / \
|
29 |
-
(sum_activations[:, :, None, None] + eps)
|
30 |
-
weights = weights.sum(axis=(2, 3))
|
31 |
-
return weights
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/gradio_blocks.py
CHANGED
@@ -3,7 +3,7 @@ import os
|
|
3 |
|
4 |
VIDEOS_PER_ROW = 3
|
5 |
VIDEO_EXAMPLES_PATH = "src/example_videos"
|
6 |
-
def build_video_to_camvideo(CAM_METHODS, CV2_COLORMAPS, LAYERS, ALL_CLASSES, gradcam_video):
|
7 |
with gr.Row():
|
8 |
with gr.Column(scale=2):
|
9 |
gr.Markdown("### Video to GradCAM-Video")
|
@@ -36,13 +36,21 @@ def build_video_to_camvideo(CAM_METHODS, CV2_COLORMAPS, LAYERS, ALL_CLASSES, gra
|
|
36 |
scale=2,
|
37 |
)
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
with gr.Row():
|
48 |
colormap = gr.Dropdown(
|
@@ -70,11 +78,11 @@ def build_video_to_camvideo(CAM_METHODS, CV2_COLORMAPS, LAYERS, ALL_CLASSES, gra
|
|
70 |
|
71 |
with gr.Column(scale=1):
|
72 |
with gr.Column():
|
73 |
-
video_in = gr.Video(autoplay=False, include_audio=False)
|
74 |
-
video_out = gr.Video(autoplay=False, include_audio=False)
|
75 |
|
76 |
gif_cam_mode_button = gr.Button(value="Show GradCAM-Video", label="GradCAM", scale=1)
|
77 |
-
gif_cam_mode_button.click(fn=gradcam_video, inputs=[video_in, colormap, use_eigen_smooth, bw_highlight, video_alpha, video_cam_method, video_layer, video_animal_to_explain], outputs=[video_out], queue=True)
|
78 |
|
79 |
with gr.Row():
|
80 |
with gr.Column():
|
|
|
3 |
|
4 |
VIDEOS_PER_ROW = 3
|
5 |
VIDEO_EXAMPLES_PATH = "src/example_videos"
|
6 |
+
def build_video_to_camvideo(CAM_METHODS, CV2_COLORMAPS, LAYERS, ALL_CLASSES, gradcam_video, language):
|
7 |
with gr.Row():
|
8 |
with gr.Column(scale=2):
|
9 |
gr.Markdown("### Video to GradCAM-Video")
|
|
|
36 |
scale=2,
|
37 |
)
|
38 |
|
39 |
+
with gr.Row():
|
40 |
+
video_animal_to_explain = gr.Dropdown(
|
41 |
+
choices=["Predicted Class"] + ALL_CLASSES,
|
42 |
+
label="Animal",
|
43 |
+
value="Predicted Class",
|
44 |
+
interactive=True,
|
45 |
+
scale=4,
|
46 |
+
)
|
47 |
+
|
48 |
+
show_predicted_class = gr.Checkbox(
|
49 |
+
label="Show Predicted Class",
|
50 |
+
value=False,
|
51 |
+
interactive=True,
|
52 |
+
scale=1,
|
53 |
+
)
|
54 |
|
55 |
with gr.Row():
|
56 |
colormap = gr.Dropdown(
|
|
|
78 |
|
79 |
with gr.Column(scale=1):
|
80 |
with gr.Column():
|
81 |
+
video_in = gr.Video(autoplay=False, include_audio=False, label="Input Video")
|
82 |
+
video_out = gr.Video(autoplay=False, include_audio=False, show_label=False)
|
83 |
|
84 |
gif_cam_mode_button = gr.Button(value="Show GradCAM-Video", label="GradCAM", scale=1)
|
85 |
+
gif_cam_mode_button.click(fn=gradcam_video, inputs=[video_in, colormap, use_eigen_smooth, bw_highlight, video_alpha, video_cam_method, video_layer, video_animal_to_explain, show_predicted_class, language], outputs=[video_out], queue=True)
|
86 |
|
87 |
with gr.Row():
|
88 |
with gr.Column():
|
src/results/gradcam_video.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0bdd085324b0f6cf522ac7fbf79b31c0eca97ae780cd6d0093bb87fc71ad142
|
3 |
+
size 734566
|
src/results/infer_image.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|