Spaces:

GabrielML
/

Explain-Animal-CNN

Sleeping

App Files Files Community

GabrielML commited on Oct 3, 2023

Commit

8850972

1 Parent(s): c426221

Fixes

Browse files

Files changed (30) hide show

app.py +202 -100
requirements.txt +0 -0
requirements_old.txt +0 -0
src/example_videos/jellyfish_-_110877 (360p).mp4 +0 -3
src/example_videos/monarch_-_327 (360p).mp4 +0 -3
src/example_videos/pexels-zlatin-georgiev-5607745 (240p).mp4 +0 -3
src/example_videos/pexels-zlatin-georgiev-7173031 (240p).mp4 +0 -3
src/example_videos/pexels_videos_2556839 (240p).mp4 +0 -3
src/example_videos/squirrel_on_a_wood (360p).mp4 +0 -3
src/examples/AI_Generated/goat (2).png +0 -3
src/examples/AI_Generated/koala.png +0 -3
src/examples/AI_Generated/rabbit.png +0 -3
src/examples/AI_Generated/rhinoceros.png +0 -3
src/examples/AI_Generated/swan.png +0 -3
src/examples/AI_Generated/woodpecker.png +0 -3
src/examples/false_predicted/boar.jpg +0 -3
src/examples/false_predicted/dolphin.jpg +0 -3
src/examples/false_predicted/horse.jpg +0 -3
src/examples/false_predicted/sparrow.jpg +0 -3
src/examples/others/Tiger-fuers-Wohnzimmer-In-Hybridkatzen-steckt-ein-Stueck-Wildnis-2.jpg +0 -3
src/examples/true_predicted/dragonfly.jpg +0 -3
src/examples/true_predicted/goat.jpg +0 -3
src/examples/true_predicted/panda.jpg +0 -3
src/examples/true_predicted/rat.jpg +0 -3
src/examples/true_predicted/wombat.jpg +0 -3
src/gradio_blocks.py +36 -19
src/header.md +7 -5
src/results/gradcam_video.mp4 +2 -2
src/{example_videos/butterfly_-_38947 (360p).mp4 → results/infer_image.png} +2 -2
src/util.py +41 -1

app.py CHANGED Viewed

@@ -1,49 +1,80 @@
 import copy
 import os
 import sys
 sys.path.append('src')
 from collections import defaultdict
 from functools import lru_cache
 import gradio as gr
-import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import torch
 from deep_translator import GoogleTranslator
 from Nets import CustomResNet18
 from PIL import Image
-from torchcam.methods import GradCAM, GradCAMpp, SmoothGradCAMpp, XGradCAM
-from torchcam.utils import overlay_mask
-from torchvision.transforms.functional import to_pil_image
 from tqdm import tqdm
-from util import transform
-from gradio_blocks import build_video_to_camvideo
-import cv2
-import ffmpeg
-import shutil
-import mediapy
 ffmpeg_path = shutil.which('ffmpeg')
 mediapy.set_ffmpeg(ffmpeg_path)
 IMAGE_PATH = os.path.join(os.getcwd(), 'src/examples')
-IMAGES_PER_ROW = 10
 MAXIMAL_FRAMES = 1000
-BATCHES_TO_PROCESS = 15
 OUTPUT_FPS = 10
-MAX_OUT_FRAMES = 70
 CAM_METHODS = {
     "GradCAM": GradCAM,
-    "GradCAM++": GradCAMpp,
     "XGradCAM": XGradCAM,
-    "SmoothGradCAM++": SmoothGradCAMpp,
 }
-model = CustomResNet18(90).eval()
-model.load_state_dict(torch.load('src/results/models/best_model.pth', map_location=torch.device('cpu')))
-cam_model = copy.deepcopy(model)
 data_df = pd.read_csv('src/cache/val_df.csv')
 C_NUM_TO_NAME = data_df[['encoded_target', 'target']].drop_duplicates().sort_values('encoded_target').set_index('encoded_target')['target'].to_dict()
@@ -58,16 +89,19 @@ def get_class_idx(name):
 @lru_cache(maxsize=100)
 def get_translated(to_translate):
-    # return "ssss"
     return GoogleTranslator(source="en", target="de").translate(to_translate)
 for idx in range(90): get_translated(get_class_name(idx))
-def infer_image(image, image_sketch):
-    image = image if image is not None else image_sketch
     image = transform(image)
     image = image.unsqueeze(0)
     with torch.no_grad():
-        output = model(image)
     distribution = torch.nn.functional.softmax(output, dim=1)
     ret = defaultdict(float)
     for idx, prob in enumerate(distribution[0]):
@@ -75,32 +109,51 @@ def infer_image(image, image_sketch):
         ret[animal] = prob.item()
     return ret
-def gradcam(image, image_sketch=None, alpha=0.5, cam_method=GradCAM, layer=None, specific_class="Predicted Class"):
-    image = image if image is not None else image_sketch
-    if layer == 'layer1': layers = [model.resnet.layer1]
-    elif layer == 'layer2': layers = [model.resnet.layer2]
-    elif layer == 'layer3': layers = [model.resnet.layer3]
-    elif layer == 'layer4': layers = [model.resnet.layer4]
-    else: layers = [model.resnet.layer1, model.resnet.layer2, model.resnet.layer3, model.resnet.layer4]
-    model.eval()
-    img_tensor = transform(image).unsqueeze(0)
-    cam = CAM_METHODS[cam_method](model, target_layer=layers)
-    output = model(img_tensor)
-    class_to_explain = output.squeeze(0).argmax().item() if specific_class == "Predicted Class" else get_class_idx(specific_class)
-    activation_map = cam(class_to_explain, output)
-    result = overlay_mask(image, to_pil_image(activation_map[0].squeeze(0), mode='F'), alpha=alpha)
-    cam.remove_hooks()
-    # # height maximal 300px
-    # if result.size[1] > 300:
-    #     ratio = 300 / result.size[1]
-    #     result = result.resize((int(result.size[0] * ratio), 300))
-    return result
-def gradcam_video(video, alpha=0.5, cam_method=GradCAM, layer=None, specific_class="Predicted Class"):
     global OUTPUT_FPS, MAXIMAL_FRAMES, BATCHES_TO_PROCESS, MAX_OUT_FRAMES
     video = cv2.VideoCapture(video)
     fps = int(video.get(cv2.CAP_PROP_FPS))
     if OUTPUT_FPS == -1: OUTPUT_FPS = fps
@@ -127,36 +180,32 @@ def gradcam_video(video, alpha=0.5, cam_method=GradCAM, layer=None, specific_cla
     print(f'Frames to process: {len(frames)}')
     processed = [Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in frames]
-    # generate lists in lists for the images for batch processing. 10 images per inner list..
     batched = [processed[i:i + BATCHES_TO_PROCESS] for i in range(0, len(processed), BATCHES_TO_PROCESS)]
-    model.eval()
-    if layer == 'layer1': layers = [model.resnet.layer1]
-    elif layer == 'layer2': layers = [model.resnet.layer2]
-    elif layer == 'layer3': layers = [model.resnet.layer3]
-    elif layer == 'layer4': layers = [model.resnet.layer4]
-    else: layers = [model.resnet.layer1, model.resnet.layer2, model.resnet.layer3, model.resnet.layer4]
-    cam = CAM_METHODS[cam_method](model, target_layer=layers)
     results = list()
-    for i, batch in enumerate(tqdm(batched)):
-        images_tensor = torch.stack([transform(image) for image in batch])
-        outputs = model(images_tensor)
-        out_classes = [output.argmax().item() for output in outputs]
-        classes_to_explain = out_classes if specific_class == "Predicted Class" else [get_class_idx(specific_class)] * len(out_classes)
-        activation_maps = cam(classes_to_explain, outputs)
-        for j, activation_map in enumerate(activation_maps[0]):
-            result = overlay_mask(batch[j], to_pil_image(activation_map, mode='F'), alpha=alpha)
-            results.append(cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR))
-    cam.remove_hooks()
     # save video
-    # fourcc = cv2.VideoWriter_fourcc(*'AVC1')
-    # fourcc = cv2.VideoWriter_fourcc(*'MP4V')
-    # fourcc = cv2.VideoWriter_fourcc(*'XVID')
-    # size = (results[0].shape[1], results[0].shape[0])
-    # video = cv2.VideoWriter('src/results/gradcam_video.mp4', fourcc, OUTPUT_FPS, size)
-    # for frame in results:
-    #     video.write(frame)
     mediapy.write_video('src/results/gradcam_video.mp4', results, fps=OUTPUT_FPS)
     video.release()
     return 'src/results/gradcam_video.mp4'
@@ -190,10 +239,15 @@ def load_examples():
                 for j in range(IMAGES_PER_ROW):
                     if i * IMAGES_PER_ROW + j >= len(images_to_load): break
                     image = images_to_load[i * IMAGES_PER_ROW + j]
                     loaded_images[image_type].append(
                         gr.Image(
-                            value=os.path.join(full_path, image),
-                            label=f"image ({get_translated(image.split('.')[0])})",
                             type="pil",
                             interactive=False,
                             elem_classes=["selectable_images"],
@@ -224,22 +278,13 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped', css=css) as demo:
     #                INPUT IMAGE
     # -------------------------------------------
     with gr.Row():
-        with gr.Tab("Upload Image"):
-            with gr.Row(variant="panel", equal_height=True):
-                user_image = gr.Image(
-                    type="pil",
-                    label="Upload Your Own Image",
-                    info="You can also upload your own image for prediction.",
-                )
-        with gr.Tab("Draw Image"):
-            with gr.Row(variant="panel", equal_height=True):
-                user_image_sketched = gr.Image(
-                    type="pil",
-                    source="canvas",
-                    tool="color-sketch",
-                    label="Draw Your Own Image",
-                    info="You can also draw your own image for prediction.",
-                )
     # -------------------------------------------
     #                TOOLS
@@ -257,7 +302,7 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped', css=css) as demo:
                     scale=5,
                 )
                 predict_mode_button = gr.Button(value="Predict Animal", label="Predict", info="Click to make a prediction.", scale=1)
-                predict_mode_button.click(fn=infer_image, inputs=[user_image, user_image_sketched], outputs=output, queue=True)
         # -------------------------------------------
         #                EXPLAIN
@@ -265,16 +310,20 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped', css=css) as demo:
         with gr.Tab("Explain Image"):
             with gr.Row():
                 with gr.Column():
                     cam_method = gr.Radio(
                         list(CAM_METHODS.keys()),
                         label="GradCAM Method",
                         value="GradCAM",
                         interactive=True,
                         scale=2,
                     )
-                    cam_method.description = "Here you can choose the GradCAM method."
-                    cam_method.description_place = "left"
                     alpha = gr.Slider(
                         minimum=.1,
                         maximum=.9,
@@ -283,46 +332,99 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped', css=css) as demo:
                         step=.1,
                         label="Alpha",
                         scale=1,
                     )
-                    alpha.description = "Here you can choose the alpha value."
-                    alpha.description_place = "left"
                     layer = gr.Radio(
-                        ["layer1", "layer2", "layer3", "layer4", "all"],
                         label="Layer",
                         value="layer4",
                         interactive=True,
                         scale=2,
                     )
-                    layer.description = "Here you can choose the layer to visualize."
-                    layer.description_place = "left"
                     animal_to_explain = gr.Dropdown(
                         choices=["Predicted Class"] + ALL_CLASSES,
                         label="Animal",
                         value="Predicted Class",
                         interactive=True,
                         scale=2,
                     )
-                    animal_to_explain.description = "Here you can choose the animal to explain. If you choose 'Predicted Class' the method will explain the predicted class."
-                    animal_to_explain.description_place = "center"
                 with gr.Column():
                     output_cam = gr.Image(
                         type="pil",
                         label="GradCAM",
-                        info="GradCAM visualization"
                     )
                     gradcam_mode_button = gr.Button(value="Show GradCAM", label="GradCAM", info="Click to make a prediction.", scale=1)
-                    gradcam_mode_button.click(fn=gradcam, inputs=[user_image, user_image_sketched, alpha, cam_method, layer, animal_to_explain], outputs=output_cam, queue=True)
         # -------------------------------------------
         #                Video CAM
         # -------------------------------------------
         with gr.Tab("Explain Video"):
-            build_video_to_camvideo(CAM_METHODS, ALL_CLASSES, gradcam_video)
         # -------------------------------------------
         #                EXAMPLES

 import copy
 import os
 import sys
 sys.path.append('src')
+import shutil
 from collections import defaultdict
 from functools import lru_cache
+import cv2
 import gradio as gr
+import mediapy
 import numpy as np
 import pandas as pd
 import torch
 from deep_translator import GoogleTranslator
+from gradio_blocks import build_video_to_camvideo
 from Nets import CustomResNet18
 from PIL import Image
+from pytorch_grad_cam import GradCAM, HiResCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, FullGrad
+from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
+from pytorch_grad_cam.utils.image import show_cam_on_image
 from tqdm import tqdm
+import util
+from util import transform, CustomImageCache, imageCacheWrapper
+util.ImageCache = CustomImageCache(60, False)
 ffmpeg_path = shutil.which('ffmpeg')
 mediapy.set_ffmpeg(ffmpeg_path)
 IMAGE_PATH = os.path.join(os.getcwd(), 'src/examples')
+IMAGES_PER_ROW = 5
 MAXIMAL_FRAMES = 1000
+BATCHES_TO_PROCESS = 20
 OUTPUT_FPS = 10
+MAX_OUT_FRAMES = 60
+MODEL = CustomResNet18(90).eval()
+MODEL.load_state_dict(torch.load('src/results/models/best_model.pth', map_location=torch.device('cpu')))
 CAM_METHODS = {
     "GradCAM": GradCAM,
+    "GradCAM++": GradCAMPlusPlus,
     "XGradCAM": XGradCAM,
+    "HiResCAM": HiResCAM,
+    "EigenCAM": EigenCAM
+}
+LAYERS = {
+    'layer1': MODEL.resnet.layer1,
+    'layer2': MODEL.resnet.layer2,
+    'layer3': MODEL.resnet.layer3,
+    'layer4': MODEL.resnet.layer4,
+    'all': [MODEL.resnet.layer1, MODEL.resnet.layer2, MODEL.resnet.layer3, MODEL.resnet.layer4],
+    'layer3+4': [MODEL.resnet.layer3, MODEL.resnet.layer4]
+}
+CV2_COLORMAPS = {
+    "Autumn": cv2.COLORMAP_AUTUMN,
+    "Bone": cv2.COLORMAP_BONE,
+    "Jet": cv2.COLORMAP_JET,
+    "Winter": cv2.COLORMAP_WINTER,
+    "Rainbow": cv2.COLORMAP_RAINBOW,
+    "Ocean": cv2.COLORMAP_OCEAN,
+    "Summer": cv2.COLORMAP_SUMMER,
+    "Pink": cv2.COLORMAP_PINK,
+    "Hot": cv2.COLORMAP_HOT,
+    "Magma": cv2.COLORMAP_MAGMA,
+    "Inferno": cv2.COLORMAP_INFERNO,
+    "Plasma": cv2.COLORMAP_PLASMA,
+    "Twilight": cv2.COLORMAP_TWILIGHT,
 }
+# cam_model = copy.deepcopy(model)
 data_df = pd.read_csv('src/cache/val_df.csv')
 C_NUM_TO_NAME = data_df[['encoded_target', 'target']].drop_duplicates().sort_values('encoded_target').set_index('encoded_target')['target'].to_dict()
 @lru_cache(maxsize=100)
 def get_translated(to_translate):
     return GoogleTranslator(source="en", target="de").translate(to_translate)
 for idx in range(90): get_translated(get_class_name(idx))
+@imageCacheWrapper
+def infer_image(image):
+    if isinstance(image, dict):
+        # Its the image and a mask as pillow both -> Combine them to one image
+        image = Image.blend(image["image"], image["mask"], alpha=0.5)
+    image.save('src/results/infer_image.png')
     image = transform(image)
     image = image.unsqueeze(0)
     with torch.no_grad():
+        output = MODEL(image)
     distribution = torch.nn.functional.softmax(output, dim=1)
     ret = defaultdict(float)
     for idx, prob in enumerate(distribution[0]):
         ret[animal] = prob.item()
     return ret
+def gradcam(image, colormap="Jet", use_eigen_smooth=False, use_aug_smooth=False, BWHighlight=False, alpha=0.5, cam_method=GradCAM, layer=None, specific_class="Predicted Class"):
+    if image is None:
+        raise gr.Error("Please upload an image.")
+    if isinstance(image, dict):
+        # Its the image and a mask as pillow both -> Combine them to one image
+        image = Image.blend(image["image"], image["mask"], alpha=0.5)
+    if colormap not in CV2_COLORMAPS.keys():
+        raise gr.Error(f"Colormap {colormap} not found in {list(CV2_COLORMAPS.keys())}.")
+    else:
+        colormap = CV2_COLORMAPS[colormap]
+    image_width, image_height = image.size
+    if image_width > 4000 or image_height > 4000:
+        raise gr.Error("The image is too big. The maximal size is 4000x4000.")
+    MODEL.eval()
+    layers = LAYERS[layer]
+    image_tensor = transform(image).unsqueeze(0)
+    targets = [ClassifierOutputTarget(get_class_idx(specific_class))] if specific_class != "Predicted Class" else None
+    with CAM_METHODS[cam_method](model=MODEL, target_layers=layers) as cam:
+        grayscale_cam = cam(input_tensor=image_tensor, targets=targets, aug_smooth=use_aug_smooth, eigen_smooth=use_eigen_smooth)
+    grayscale_cam = grayscale_cam[0, :]
+    grayscale_cam = cv2.resize(grayscale_cam, (image_width, image_height), interpolation=cv2.INTER_CUBIC)
+    image = np.float32(image)
+    visualization = None
+    if BWHighlight:
+        image = image * grayscale_cam[..., np.newaxis]
+        visualization = image.astype(np.uint8)
+    else:
+        image = image / 255
+        visualization = show_cam_on_image(image, grayscale_cam, use_rgb=True, image_weight=alpha, colormap=colormap)
+    return Image.fromarray(visualization)
+def gradcam_video(video, colormap="Jet", use_eigen_smooth=False, BWHighlight=False, alpha=0.5, cam_method=GradCAM, layer=None, specific_class="Predicted Class"):
     global OUTPUT_FPS, MAXIMAL_FRAMES, BATCHES_TO_PROCESS, MAX_OUT_FRAMES
+    if colormap not in CV2_COLORMAPS.keys():
+        raise gr.Error(f"Colormap {colormap} not found in {list(CV2_COLORMAPS.keys())}.")
+    else:
+        colormap = CV2_COLORMAPS[colormap]
     video = cv2.VideoCapture(video)
     fps = int(video.get(cv2.CAP_PROP_FPS))
     if OUTPUT_FPS == -1: OUTPUT_FPS = fps
     print(f'Frames to process: {len(frames)}')
     processed = [Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in frames]
+    # generate lists in lists for the images for batch processing. BATCHES_TO_PROCESS images per inner list
     batched = [processed[i:i + BATCHES_TO_PROCESS] for i in range(0, len(processed), BATCHES_TO_PROCESS)]
+    MODEL.eval()
+    layers = LAYERS[layer]
     results = list()
+    targets = [ClassifierOutputTarget(get_class_idx(specific_class))] if specific_class != "Predicted Class" else None
+    with CAM_METHODS[cam_method](model=MODEL, target_layers=layers) as cam:
+        for i, batch in enumerate(tqdm(batched)):
+            images_tensor = torch.stack([transform(image) for image in batch])
+            grayscale_cam = cam(input_tensor=images_tensor, targets=targets, aug_smooth=False, eigen_smooth=use_eigen_smooth)
+            for i, image in enumerate(batch):
+                _grayscale_cam = grayscale_cam[i, :]
+                _grayscale_cam = cv2.resize(_grayscale_cam, (width, height), interpolation=cv2.INTER_LINEAR)
+                image = np.float32(image)
+                visualization = None
+                if BWHighlight:
+                    image = image * _grayscale_cam[..., np.newaxis]
+                    visualization = image.astype(np.uint8)
+                else:
+                    image = image / 255
+                    visualization = show_cam_on_image(image, _grayscale_cam, use_rgb=True, image_weight=alpha, colormap=colormap)
+                results.append(visualization)
     # save video
     mediapy.write_video('src/results/gradcam_video.mp4', results, fps=OUTPUT_FPS)
     video.release()
     return 'src/results/gradcam_video.mp4'
                 for j in range(IMAGES_PER_ROW):
                     if i * IMAGES_PER_ROW + j >= len(images_to_load): break
                     image = images_to_load[i * IMAGES_PER_ROW + j]
+                    name = f"{image.split('.')[0]} ({get_translated(image.split('.')[0])})"
+                    image = Image.open(os.path.join(full_path, image))
+                    # scale so that the longest side is 600px
+                    scale = 600 / max(image.size)
+                    image = image.resize((int(image.size[0] * scale), int(image.size[1] * scale)))
                     loaded_images[image_type].append(
                         gr.Image(
+                            value=image,
+                            label=name,
                             type="pil",
                             interactive=False,
                             elem_classes=["selectable_images"],
     #                INPUT IMAGE
     # -------------------------------------------
     with gr.Row():
+        with gr.Row(variant="panel", equal_height=True):
+            user_image = gr.Image(
+                type="pil",
+                label="Upload Your Own Image",
+                tool="sketch",
+                interactive=True,
+            )
     # -------------------------------------------
     #                TOOLS
                     scale=5,
                 )
                 predict_mode_button = gr.Button(value="Predict Animal", label="Predict", info="Click to make a prediction.", scale=1)
+                predict_mode_button.click(fn=infer_image, inputs=[user_image], outputs=output, queue=True)
         # -------------------------------------------
         #                EXPLAIN
         with gr.Tab("Explain Image"):
             with gr.Row():
                 with gr.Column():
+                    _info = "There are different GradCAM methods. You can read more about them here: (https://github.com/jacobgil/pytorch-grad-cam#references)."
                     cam_method = gr.Radio(
                         list(CAM_METHODS.keys()),
                         label="GradCAM Method",
+                        info=_info,
                         value="GradCAM",
                         interactive=True,
                         scale=2,
                     )
+                    _info = """
+                        The alpha value is used to blend the original image with the GradCAM visualization. If you choose a value of 0.5 the original image and the GradCAM visualization will be blended equally.
+                        If you choose a value of 0.1 the original image will be barely visible and if you choose a value of 0.9 the GradCAM visualization will be barely visible.
+                        """
                     alpha = gr.Slider(
                         minimum=.1,
                         maximum=.9,
                         step=.1,
                         label="Alpha",
                         scale=1,
+                        info=_info
                     )
+                    _info = """
+                        The layer is used to choose the layer of the ResNet50 model. The GradCAM visualization will be based on this layer.
+                        Best to choose is the last layer (layer4) because it is the layer with the most information before the final prediction. This makes the GradCAM visualization the most meaningful.
+                        If all layers are chosen the GradCAM visualization will be averaged over all layers.
+                    """
                     layer = gr.Radio(
+                        LAYERS.keys(),
                         label="Layer",
                         value="layer4",
                         interactive=True,
                         scale=2,
+                        info=_info
                     )
+                    _info = """
+                        Here you can choose the animal to "explain". If you choose "Predicted Class" the GradCAM visualization will be based on the predicted class.
+                        If you choose a specific class the GradCAM visualization will be based on this class.
+                        For example if you have an image with a dog and a cat, you can select either Cat or Dog and see if the model can focus on the correct animal.
+                    """
                     animal_to_explain = gr.Dropdown(
                         choices=["Predicted Class"] + ALL_CLASSES,
                         label="Animal",
                         value="Predicted Class",
                         interactive=True,
                         scale=2,
+                        info=_info
                     )
+                    with gr.Row():
+                        _info = """
+                            Here you can choose the colormap. Instead of a colormap you can also choose "BW Highlight" to just keep the original image and highlight the important parts of the image.
+                            If you select "BW Highlight" the colormap will be ignored.
+                        """
+                        colormap = gr.Dropdown(
+                            choices=list(CV2_COLORMAPS.keys()),
+                            label="Colormap",
+                            value="Jet",
+                            interactive=True,
+                            scale=2,
+                            info=_info
+                        )
+                        bw_highlight = gr.Checkbox(
+                            label="BW Highlight",
+                            value=False,
+                            interactive=True,
+                            scale=1,
+                        )
+                        bw_highlight.description = "Here you can choose if you want to highlight the important parts of the image in black and white."
+                    with gr.Row():
+                        _info = """
+                            The Eigen Smooth is a method to smooth the GradCAM visualization.
+                        """
+                        use_eigen_smooth = gr.Checkbox(
+                            label="Eigen Smooth",
+                            value=False,
+                            interactive=True,
+                            scale=1,
+                            info=_info
+                        )
+                        _info = """
+                            The Aug Smooth is also a method to smooth the GradCAM visualization. But this method needs a lot of performance and is therefore slow.
+                        """
+                        use_aug_smooth = gr.Checkbox(
+                            label="Aug Smooth",
+                            value=False,
+                            interactive=True,
+                            scale=1,
+                            info=_info
+                        )
                 with gr.Column():
                     output_cam = gr.Image(
                         type="pil",
                         label="GradCAM",
+                        info="GradCAM visualization",
+                        scale=5,
                     )
                     gradcam_mode_button = gr.Button(value="Show GradCAM", label="GradCAM", info="Click to make a prediction.", scale=1)
+                    gradcam_mode_button.click(fn=gradcam, inputs=[user_image, colormap, use_eigen_smooth, use_aug_smooth, bw_highlight, alpha, cam_method, layer, animal_to_explain], outputs=output_cam, queue=True)
         # -------------------------------------------
         #                Video CAM
         # -------------------------------------------
         with gr.Tab("Explain Video"):
+            build_video_to_camvideo(CAM_METHODS, CV2_COLORMAPS, LAYERS, ALL_CLASSES, gradcam_video)
         # -------------------------------------------
         #                EXAMPLES

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ

requirements_old.txt ADDED Viewed

Binary file (4.01 kB). View file

src/example_videos/jellyfish_-_110877 (360p).mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:53e45f6bd34aaeefdabfeddc9d70a4d7670a183137f2469db42f4f90e73ea296
-size 797977

src/example_videos/monarch_-_327 (360p).mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d0c163fd1ca2ec280c13430f8606d14e8c490980d66b63610ccf3e5af581138e
-size 428449

src/example_videos/pexels-zlatin-georgiev-5607745 (240p).mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:02d82cff5c3c4eb51edd6f07e64e33116cd6889ad45dfb9521aa89406022c539
-size 470993

src/example_videos/pexels-zlatin-georgiev-7173031 (240p).mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6931230df61b43d063905f96e313ea47ac3b3ca16fd5d749bcd167b07d83ee69
-size 268559

src/example_videos/pexels_videos_2556839 (240p).mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:065667e0d791af82df46c2b7667f17d5a0687fce606fc7e5c216a0e9c3045f76
-size 402447

src/example_videos/squirrel_on_a_wood (360p).mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9af343138b0a589ccee2c9469f97ce55cbee720ffec9084429f33ae1e37e4f12
-size 2006082

src/examples/AI_Generated/goat (2).png DELETED Viewed

Git LFS Details

SHA256: e1346e0c71f880f274de273ed6eee5e8ad2bc7b2d767560459192d5de9bec8b8
Pointer size: 132 Bytes
Size of remote file: 1.99 MB

src/examples/AI_Generated/koala.png DELETED Viewed

Git LFS Details

SHA256: 9ac3ad3beab6456614f297254589507af1df3191db8bfcbad26924c5d996e831
Pointer size: 132 Bytes
Size of remote file: 1.65 MB

src/examples/AI_Generated/rabbit.png DELETED Viewed

Git LFS Details

SHA256: d376c67d72e403102393e685459084d45d6b804caac42348aa712dc646075796
Pointer size: 132 Bytes
Size of remote file: 2.36 MB

src/examples/AI_Generated/rhinoceros.png DELETED Viewed

Git LFS Details

SHA256: 2aac1990b1b6a835100ae9f75824fb84f4a8e322aa8e7f73d67f89506c3330d2
Pointer size: 132 Bytes
Size of remote file: 2.75 MB

src/examples/AI_Generated/swan.png DELETED Viewed

Git LFS Details

SHA256: 5b0aa528e07a8a78f3bc4c610a40f46b769e7b540c9f228d4027d3288a2edda1
Pointer size: 132 Bytes
Size of remote file: 2.1 MB

src/examples/AI_Generated/woodpecker.png DELETED Viewed

Git LFS Details

SHA256: 98eae49bd0c23630df30be211d899b1cc39fd6877c7dcb8dfc8d1cf1b6f1e8b9
Pointer size: 132 Bytes
Size of remote file: 2.16 MB

src/examples/false_predicted/boar.jpg DELETED Viewed

Git LFS Details

SHA256: a5ebd92e69975a55c6568f50f642fef14609efaad4fc3e32978b2057ced39f96
Pointer size: 130 Bytes
Size of remote file: 21.6 kB

src/examples/false_predicted/dolphin.jpg DELETED Viewed

Git LFS Details

SHA256: cc0c0d4ba9df1df29c21e04b642f2d5cb597489bcb62b90c3b73bed01286de2c
Pointer size: 129 Bytes
Size of remote file: 3.39 kB

src/examples/false_predicted/horse.jpg DELETED Viewed

Git LFS Details

SHA256: ab44868a2558e57b36c83af01cecf2c427b9403f9eec550898f7c767a7c4af1c
Pointer size: 131 Bytes
Size of remote file: 138 kB

src/examples/false_predicted/sparrow.jpg DELETED Viewed

Git LFS Details

SHA256: 4b09f1b5809696b10847e0c8b5c0e2d4febbe8cc8eb81f4493316c7c7e91d048
Pointer size: 129 Bytes
Size of remote file: 7.96 kB

src/examples/others/Tiger-fuers-Wohnzimmer-In-Hybridkatzen-steckt-ein-Stueck-Wildnis-2.jpg DELETED Viewed

Git LFS Details

SHA256: a94bef5b6320925e4d750dc7109b608ca5eb512736078dfaeeac48f6cb070216
Pointer size: 131 Bytes
Size of remote file: 193 kB

src/examples/true_predicted/dragonfly.jpg DELETED Viewed

Git LFS Details

SHA256: d98a75d0aa449d80f180f561bc075b03b61b7a2a22d071e100144012431032f6
Pointer size: 131 Bytes
Size of remote file: 104 kB

src/examples/true_predicted/goat.jpg DELETED Viewed

Git LFS Details

SHA256: 6c718b96f73fc30732401210e5b0b586ccb22d366c8d623a57e3bc3eccbceb86
Pointer size: 129 Bytes
Size of remote file: 9.4 kB

src/examples/true_predicted/panda.jpg DELETED Viewed

Git LFS Details

SHA256: 66e8398d083eb047a6b92eefee2bbd113786e6858a2f5b5e491e820603ad8b8d
Pointer size: 130 Bytes
Size of remote file: 14.5 kB

src/examples/true_predicted/rat.jpg DELETED Viewed

Git LFS Details

SHA256: a77b75dcd4151cd85fb5c57e134cb45c03632d83e6517d81862f95659786e437
Pointer size: 130 Bytes
Size of remote file: 71.4 kB

src/examples/true_predicted/wombat.jpg DELETED Viewed

Git LFS Details

SHA256: aed677c70afef65f65950969e652f7fc1ef9ccc10e413719a5a4e65f8f153e6e
Pointer size: 130 Bytes
Size of remote file: 14.8 kB

src/gradio_blocks.py CHANGED Viewed

@@ -3,12 +3,12 @@ import os
 VIDEOS_PER_ROW = 3
 VIDEO_EXAMPLES_PATH = "src/example_videos"
-def build_video_to_camvideo(CAM_METHODS, ALL_CLASSES, gradcam_video):
     with gr.Row():
         with gr.Column(scale=2):
             gr.Markdown("### Video to GradCAM-Video")
             gr.Markdown("Here you can upload a video and visualize the GradCAM.")
-            gr.Markdown("Please note that this can take a while. Also currently only a maximum of 70 frames can be processed. The video will be cut to 70 frames if it is longer. Furthermore, the video can only consist of a maximum of 1000.")
             gr.Markdown("The more frames and fps the video has, the longer it takes to process and the result will be more choppy.")
             video_cam_method = gr.Radio(
                     ["GradCAM", "GradCAM++"],
@@ -17,8 +17,6 @@ def build_video_to_camvideo(CAM_METHODS, ALL_CLASSES, gradcam_video):
                     interactive=True,
                     scale=2,
                 )
-            video_cam_method.description = "Here you can choose the GradCAM method."
-            video_cam_method.description_place = "left"
             video_alpha = gr.Slider(
                     minimum=.1,
@@ -29,35 +27,54 @@ def build_video_to_camvideo(CAM_METHODS, ALL_CLASSES, gradcam_video):
                     label="Alpha",
                     scale=1,
                 )
-            video_alpha.description = "Here you can choose the alpha value."
-            video_alpha.description_place = "left"
             video_layer = gr.Radio(
-                    ["layer1", "layer2", "layer3", "layer4", "all"],
-                    label="Layer",
-                    value="layer4",
                     interactive=True,
                     scale=2,
                 )
-            video_layer.description = "Here you can choose the layer to visualize."
-            video_layer.description_place = "left"
-            video_animal_to_explain = gr.Dropdown(
-                    choices=["Predicted Class"] + ALL_CLASSES,
-                    label="Animal",
-                    value="Predicted Class",
                     interactive=True,
-                    scale=2,
                 )
-            video_animal_to_explain.description = "Here you can choose the animal to explain. If you choose 'Predicted Class' the method will explain the predicted class."
-            video_animal_to_explain.description_place = "center"
         with gr.Column(scale=1):
             with gr.Column():
                 video_in = gr.Video(autoplay=False, include_audio=False)
                 video_out = gr.Video(autoplay=False, include_audio=False)
             gif_cam_mode_button = gr.Button(value="Show GradCAM-Video", label="GradCAM", scale=1)
-            gif_cam_mode_button.click(fn=gradcam_video, inputs=[video_in, video_alpha, video_cam_method, video_layer, video_animal_to_explain], outputs=[video_out], queue=True)
     with gr.Row():
         with gr.Column():

 VIDEOS_PER_ROW = 3
 VIDEO_EXAMPLES_PATH = "src/example_videos"
+def build_video_to_camvideo(CAM_METHODS, CV2_COLORMAPS, LAYERS, ALL_CLASSES, gradcam_video):
     with gr.Row():
         with gr.Column(scale=2):
             gr.Markdown("### Video to GradCAM-Video")
             gr.Markdown("Here you can upload a video and visualize the GradCAM.")
+            gr.Markdown("Please note that this can take a while. Also currently only a maximum of 60 frames can be processed. The video will be cut to 60 frames if it is longer. Furthermore, the video can only consist of a maximum of 1000.")
             gr.Markdown("The more frames and fps the video has, the longer it takes to process and the result will be more choppy.")
             video_cam_method = gr.Radio(
                     ["GradCAM", "GradCAM++"],
                     interactive=True,
                     scale=2,
                 )
             video_alpha = gr.Slider(
                     minimum=.1,
                     label="Alpha",
                     scale=1,
                 )
             video_layer = gr.Radio(
+                        LAYERS.keys(),
+                        label="Layer",
+                        value="layer4",
+                        interactive=True,
+                        scale=2,
+                    )
+            video_animal_to_explain = gr.Dropdown(
+                choices=["Predicted Class"] + ALL_CLASSES,
+                label="Animal",
+                value="Predicted Class",
+                interactive=True,
+                scale=2,
+            )
+            with gr.Row():
+                colormap = gr.Dropdown(
+                    choices=list(CV2_COLORMAPS.keys()),
+                    label="Colormap",
+                    value="Jet",
                     interactive=True,
                     scale=2,
                 )
+                bw_highlight = gr.Checkbox(
+                    label="BW Highlight",
+                    value=False,
                     interactive=True,
+                    scale=1,
                 )
+            with gr.Row():
+                use_eigen_smooth = gr.Checkbox(
+                    label="Eigen Smooth",
+                    value=False,
+                    interactive=True,
+                    scale=1,
+                )
         with gr.Column(scale=1):
             with gr.Column():
                 video_in = gr.Video(autoplay=False, include_audio=False)
                 video_out = gr.Video(autoplay=False, include_audio=False)
             gif_cam_mode_button = gr.Button(value="Show GradCAM-Video", label="GradCAM", scale=1)
+            gif_cam_mode_button.click(fn=gradcam_video, inputs=[video_in, colormap, use_eigen_smooth, bw_highlight, video_alpha, video_cam_method, video_layer, video_animal_to_explain], outputs=[video_out], queue=True)
     with gr.Row():
         with gr.Column():

src/header.md CHANGED Viewed

@@ -2,15 +2,17 @@
 This project was created by [Ilyesse](https://github.com/ilyii) and [Gabriel](https://github.com/Gabriel9753) as part of the Explainable Machine Learning module at the [University of Applied Sciences Karlsruhe](https://www.h-ka.de/).
-The dataset used in this project is the [Animal Image Dataset](https://www.kaggle.com/datasets/iamsouravbanerjee/animal-image-dataset-90-different-animals) from Kaggle, comprising 90 different animal species that needed to be classified.
-The employed model is ResNet18, which was trained on the dataset using transfer learning techniques.
 Translation of animal names by [deep-translator](https://pypi.org/project/deep-translator/).
 ## Usage 🦎
-**Predict:** In the "Predict" tab, the model can be applied to high-resolution images to predict the species among the 90 different animals.
-**Explain:** Under the "Explain" tab, the model can be applied to high-resolution images to obtain an explanation for the prediction. This explanation is generated using the [Grad-CAM](https://github.com/frgfm/torch-cam.git) method.
-**Example Images:** The "Example Images" section allows users to view sample images from the dataset. These images can be utilized as input by simply dragging and dropping them onto the interface. It is important to note that these example images were not part of the training data used for the model.

 This project was created by [Ilyesse](https://github.com/ilyii) and [Gabriel](https://github.com/Gabriel9753) as part of the Explainable Machine Learning module at the [University of Applied Sciences Karlsruhe](https://www.h-ka.de/).
+The dataset used in this project is the [Animal Image Dataset](https://www.kaggle.com/datasets/iamsouravbanerjee/animal-image-dataset-90-different-animals) from Kaggle, comprising 90 different animal species that needed to be classified. We also added approx. 1000 AI generated images for all classes to get a more diverse dataset and also improve the performance of the model.
+The employed model is ResNet50, which was trained on the dataset using transfer learning techniques.
 Translation of animal names by [deep-translator](https://pypi.org/project/deep-translator/).
 ## Usage 🦎
+**Predict:** In the "Predict" tab, the model can be applied to the uploaded image to obtain a prediction. This is also interessting to get the animal for the following explaination.
+**Explain Image:** Under the "Explain Image" tab, you can get an explanation of the prediction in the form of a generated heatmap. We are using [this](https://github.com/jacobgil/pytorch-grad-cam) cool implementation of Grad-CAM to generate the heatmaps!
+**Explain Video**: The same as above, but for short videos. The video is split into frames and the model is applied to each frame. The resulting heatmaps are then combined to a video again.
+**Example Images:** The "Example Images" section allows users to view sample images from the dataset and another sources. Some of the images and videos are from [1](https://www.pexels.com/), [2](https://pixabay.com/) and [3](https://www.bing.com/create).

src/results/gradcam_video.mp4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:935c594a0ecbc14565723ad3896989aaaa6021232d368bf1cda5f8e9c0bf9e74
-size 922461

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9617d53ad717194350c99f6b1d2a172f01e712e4109c76b16fe3f70f32c4570
+size 772080

src/{example_videos/butterfly_-_38947 (360p).mp4 → results/infer_image.png} RENAMED Viewed

File without changes

src/util.py CHANGED Viewed

@@ -4,7 +4,8 @@ from sklearn.preprocessing import LabelEncoder
 from tqdm import tqdm
 from PIL import Image
 import torch
 class AnimalDataset(Dataset):
     def __init__(self, df, transform=None):
@@ -41,3 +42,42 @@ transform = transforms.Compose([
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
 ])

 from tqdm import tqdm
 from PIL import Image
 import torch
+import imagehash
+ImageCache = None
 class AnimalDataset(Dataset):
     def __init__(self, df, transform=None):
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
 ])
+class CustomImageCache:
+    def __init__(self, cache_size=50, debug=False):
+        self.cache = dict()
+        self.cache_size = 50
+        self.debug = debug
+        self.cache_hits = 0
+        self.cache_misses = 0
+    def __getitem__(self, image):
+        if isinstance(image, dict):
+            # Its the image and a mask as pillow both -> Combine them to one image
+            image = Image.blend(image["image"], image["mask"], alpha=0.5)
+        key = imagehash.average_hash(image)
+        if key in self.cache:
+            if self.debug: print("Cache hit!")
+            self.cache_hits += 1
+            return self.cache[key]
+        else:
+            if self.debug: print("Cache miss!")
+            self.cache_misses += 1
+            if len(self.cache.keys()) >= self.cache_size:
+                if self.debug: print("Cache full, popping item!")
+                self.cache.popitem()
+            self.cache[key] = image
+            return self.cache[key]
+    def __len__(self):
+        return len(self.cache.keys())
+    def print_info(self):
+        print(f"Cache size: {len(self)}")
+        print(f"Cache hits: {self.cache_hits}")
+        print(f"Cache misses: {self.cache_misses}")
+def imageCacheWrapper(fn):
+    def wrapper(image):
+        return fn(ImageCache[image])
+    return wrapper