Spaces:

ChayanDeb
/

Automatic_Chest_X-ray_Report_Generation_System

Running

App Files Files Community

ChayanDeb commited on Apr 22

Commit

88d7988

verified ·

1 Parent(s): c013eea

Upload 82 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +4 -0
AURA-CXR-Logo.png +0 -0
CAM-Result/gradcam_result.png +3 -0
Chest_Xray_Report_Generator-Web-V2.py +537 -0
Feedback/feedback.txt +0 -0
Model/config.json +0 -0
Model/generation_config.json +6 -0
Model/model.safetensors +3 -0
Model/preprocessor_config.json +22 -0
Model/rng_state.pth +3 -0
Model/scheduler.pt +3 -0
Model/special_tokens_map.json +6 -0
Model/tokenizer.json +0 -0
Model/tokenizer_config.json +20 -0
Model/trainer_state.json +536 -0
Model/training_args.bin +3 -0
Model/vocab.json +0 -0
Test-Images/0d930f0a-46f813a9-db3b137b-05142eef-eca3c5a7.jpg +3 -0
Test-Images/6ff741e9-6ea01eef-1bf10153-d1b6beba-590b6620.jpg +3 -0
Test-Images/93681764-ec39480e-0518b12c-199850c2-f15118ab.jpg +3 -0
pytorch_grad_cam/Readme.md +29 -0
pytorch_grad_cam/__init__.py +20 -0
pytorch_grad_cam/__pycache__/__init__.cpython-39.pyc +0 -0
pytorch_grad_cam/__pycache__/ablation_cam.cpython-39.pyc +0 -0
pytorch_grad_cam/__pycache__/ablation_layer.cpython-39.pyc +0 -0
pytorch_grad_cam/__pycache__/activations_and_gradients.cpython-39.pyc +0 -0
pytorch_grad_cam/__pycache__/base_cam.cpython-39.pyc +0 -0
pytorch_grad_cam/__pycache__/eigen_cam.cpython-39.pyc +0 -0
pytorch_grad_cam/__pycache__/eigen_grad_cam.cpython-39.pyc +0 -0
pytorch_grad_cam/__pycache__/fullgrad_cam.cpython-39.pyc +0 -0
pytorch_grad_cam/__pycache__/grad_cam.cpython-39.pyc +0 -0
pytorch_grad_cam/__pycache__/grad_cam_elementwise.cpython-39.pyc +0 -0
pytorch_grad_cam/__pycache__/grad_cam_plusplus.cpython-39.pyc +0 -0
pytorch_grad_cam/__pycache__/guided_backprop.cpython-39.pyc +0 -0
pytorch_grad_cam/__pycache__/hirescam.cpython-39.pyc +0 -0
pytorch_grad_cam/__pycache__/layer_cam.cpython-39.pyc +0 -0
pytorch_grad_cam/__pycache__/random_cam.cpython-39.pyc +0 -0
pytorch_grad_cam/__pycache__/score_cam.cpython-39.pyc +0 -0
pytorch_grad_cam/__pycache__/xgrad_cam.cpython-39.pyc +0 -0
pytorch_grad_cam/ablation_cam.py +148 -0
pytorch_grad_cam/ablation_cam_multilayer.py +136 -0
pytorch_grad_cam/ablation_layer.py +155 -0
pytorch_grad_cam/activations_and_gradients.py +46 -0
pytorch_grad_cam/base_cam.py +205 -0
pytorch_grad_cam/cam_mult_image.py +37 -0
pytorch_grad_cam/eigen_cam.py +23 -0
pytorch_grad_cam/eigen_grad_cam.py +21 -0
pytorch_grad_cam/feature_factorization/__init__.py +0 -0
pytorch_grad_cam/feature_factorization/__pycache__/__init__.cpython-39.pyc +0 -0
pytorch_grad_cam/feature_factorization/__pycache__/deep_feature_factorization.cpython-39.pyc +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+CAM-Result/gradcam_result.png filter=lfs diff=lfs merge=lfs -text
+Test-Images/0d930f0a-46f813a9-db3b137b-05142eef-eca3c5a7.jpg filter=lfs diff=lfs merge=lfs -text
+Test-Images/6ff741e9-6ea01eef-1bf10153-d1b6beba-590b6620.jpg filter=lfs diff=lfs merge=lfs -text
+Test-Images/93681764-ec39480e-0518b12c-199850c2-f15118ab.jpg filter=lfs diff=lfs merge=lfs -text

AURA-CXR-Logo.png ADDED Viewed

CAM-Result/gradcam_result.png ADDED Viewed

Git LFS Details

SHA256: a1b81c27fb575d2fbac2f18afd20b0e4e1ca81b08e4c1d4797ee6392c88fef48
Pointer size: 131 Bytes
Size of remote file: 212 kB

Chest_Xray_Report_Generator-Web-V2.py ADDED Viewed

	@@ -0,0 +1,537 @@

+import os
+import transformers
+from transformers import pipeline
+### Gradio
+import gradio as gr
+from gradio.themes.base import Base
+from gradio.themes.utils import colors, fonts, sizes
+from typing import Union, Iterable
+import time
+#####
+import cv2
+import numpy as np
+import pydicom
+import re
+##### Libraries For Grad-Cam-View
+import os
+import cv2
+import numpy as np
+import torch
+from functools import partial
+from torchvision import transforms
+from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, EigenGradCAM, LayerCAM, FullGrad
+from pytorch_grad_cam.utils.image import show_cam_on_image, preprocess_image
+from pytorch_grad_cam.ablation_layer import AblationLayerVit
+from transformers import VisionEncoderDecoderModel
+from transformers import AutoTokenizer
+import transformers
+import torch
+from openai import OpenAI
+client = OpenAI()
+import spaces  # Import the spaces module for ZeroGPU
+@spaces.GPU
+def generate_gradcam(image_path, model_path, output_path, method='gradcam', use_cuda=True, aug_smooth=False, eigen_smooth=False):
+    methods = {
+        "gradcam": GradCAM,
+        "scorecam": ScoreCAM,
+        "gradcam++": GradCAMPlusPlus,
+        "ablationcam": AblationCAM,
+        "xgradcam": XGradCAM,
+        "eigencam": EigenCAM,
+        "eigengradcam": EigenGradCAM,
+        "layercam": LayerCAM,
+        "fullgrad": FullGrad
+    }
+    if method not in methods:
+        raise ValueError(f"Method should be one of {list(methods.keys())}")
+    model = VisionEncoderDecoderModel.from_pretrained(model_path)
+    model.encoder.eval()
+    if use_cuda and torch.cuda.is_available():
+        model.encoder = model.encoder.cuda()
+    else:
+        use_cuda = False
+    #target_layers = [model.blocks[-1].norm1]  ## For ViT model
+    #target_layers = model.blocks[-1].norm1    ## For EfficientNet-B7 model
+    #target_layers = [model.encoder.encoder.layer[-1].layernorm_before]  ## For ViT-based VisionEncoderDecoder model
+    target_layers = [model.encoder.encoder.layers[-1].blocks[-0].layernorm_after, model.encoder.encoder.layers[-1].blocks[-1].layernorm_after] ## [model.encoder.encoder.layers[-1].blocks[-1].layernorm_before, model.encoder.encoder.layers[-1].blocks[0].layernorm_before]   For Swin-based VisionEncoderDecoder model
+    if method == "ablationcam":
+        cam = methods[method](model=model.encoder,
+                              target_layers=target_layers,
+                              use_cuda=use_cuda,
+                              reshape_transform=reshape_transform,
+                              ablation_layer=AblationLayerVit())
+    else:
+        cam = methods[method](model=model.encoder,
+                              target_layers=target_layers,
+                              use_cuda=use_cuda,
+                              reshape_transform=reshape_transform)
+    rgb_img = cv2.imread(image_path, 1)[:, :, ::-1]
+    rgb_img = cv2.resize(rgb_img, (384, 384)) ## (224, 224)
+    rgb_img = np.float32(rgb_img) / 255
+    input_tensor = preprocess_image(rgb_img, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+    targets = None
+    cam.batch_size = 16
+    grayscale_cam = cam(input_tensor=input_tensor, targets=targets, eigen_smooth=eigen_smooth, aug_smooth=aug_smooth)
+    grayscale_cam = grayscale_cam[0, :]
+    cam_image = show_cam_on_image(rgb_img, grayscale_cam)
+    output_file = os.path.join(output_path, 'gradcam_result.png')
+    cv2.imwrite(output_file, cam_image)
+def reshape_transform(tensor, height=12, width=12):  ### height=14, width=14 for ViT-based Model
+    batch_size, token_number, embed_dim = tensor.size()
+    if token_number < height * width:
+        pad = torch.zeros(batch_size, height * width - token_number, embed_dim, device=tensor.device)
+        tensor = torch.cat([tensor, pad], dim=1)
+    elif token_number > height * width:
+        tensor = tensor[:, :height * width, :]
+    result = tensor.reshape(batch_size, height, width, embed_dim)
+    result = result.transpose(2, 3).transpose(1, 2)
+    return result
+# Example usage:
+#image_path = "/home/chayan/CGI_Net/images/images/CXR1353_IM-0230-1001.png"
+model_path = "./Model/"
+output_path = "./CAM-Result/"
+def sentence_case(paragraph):
+    sentences = paragraph.split('. ')
+    formatted_sentences = [sentence.capitalize() for sentence in sentences if sentence]
+    formatted_paragraph = '. '.join(formatted_sentences)
+    return formatted_paragraph
+def num2sym_bullets(text, bullet='-'):
+    """
+    Replaces '<num>.' bullet points with a specified symbol and formats the text as a bullet list.
+    Args:
+        text (str): Input text containing '<num>.' bullet points.
+        bullet (str): The symbol to replace '<num>.' with.
+    Returns:
+        str: Modified text with '<num>.' replaced and formatted as a bullet list.
+    """
+    sentences = re.split(r'<num>\.\s', text)
+    formatted_text = '\n'.join(f'{bullet} {sentence.strip()}' for sentence in sentences if sentence.strip())
+    return formatted_text
+def is_cxr(image_path):
+    """
+    Checks if the uploaded image is a Chest X-ray using basic image processing.
+    Args:
+        image_path (str): Path to the uploaded image.
+    Returns:
+        bool: True if the image is likely a Chest X-ray, False otherwise.
+    """
+    try:
+        image = cv2.imread(image_path)
+        if image is None:
+            raise ValueError("Invalid image path.")
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        color_std = np.std(image, axis=2).mean()
+        if color_std > 0:
+            return False
+        return True
+    except Exception as e:
+        print(f"Error processing image: {e}")
+        return False
+def dicom_to_png(dicom_file, png_file):
+    # Load DICOM file
+    dicom_data = pydicom.dcmread(dicom_file)
+    dicom_data.PhotometricInterpretation = 'MONOCHROME1'
+    # Normalize pixel values to 0-255
+    img = dicom_data.pixel_array
+    img = img.astype(np.float32)
+    img = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX)
+    img = img.astype(np.uint8)
+    # Save as PNG
+    cv2.imwrite(png_file, img)
+    return img
+Image_Captioner = pipeline("image-to-text", model = "./Model/", device = 0)
+data_dir = "./CAM-Result"
+@spaces.GPU(duration=300)
+def xray_report_generator(Image_file, Query):
+  if Image_file[-4:] =='.dcm':
+    png_file = 'DCM2PNG.png'
+    dicom_to_png(Image_file, png_file)
+    Image_file = os.path.join(data_dir, png_file)
+    output = Image_Captioner(Image_file, max_new_tokens=512)
+  else:
+    output = Image_Captioner(Image_file, max_new_tokens=512)
+  result = output[0]['generated_text']
+  output_paragraph = sentence_case(result)
+  final_response = num2sym_bullets(output_paragraph, bullet='-')
+  query_prompt = f""" You are analyzing the doctor's query based on the patient's history and the generated chest X-ray report. Extract only the information relevant to the query.
+  If the report mentions the queried condition, write only the exact wording without any introduction. If the condition is not mentioned, respond with: 'No relevant findings related to [query condition].'.
+  """
+  #If the condition is negated, respond with: 'There is no [query condition].'.
+  completion = client.chat.completions.create(
+  model="gpt-4-turbo",  ### gpt-4-turbo ### gpt-3.5-turbo-0125
+  messages=[
+    {"role": "system", "content": query_prompt},
+    {"role": "user", "content": f"Generated Report: {final_response}\nHistory/Doctor's Query: {Query}"}
+   ],
+   temperature=0.2)
+  query_response = completion.choices[0].message.content
+  generate_gradcam(Image_file, model_path, output_path, method='gradcam', use_cuda=True)
+  grad_cam_image =  output_path + 'gradcam_result.png'
+  return grad_cam_image, final_response, query_response
+# def save_feedback(feedback):
+#     feedback_dir = "Chayan/Feedback/"  # Update this to your desired directory
+#     if not os.path.exists(feedback_dir):
+#         os.makedirs(feedback_dir)
+#     feedback_file = os.path.join(feedback_dir, "feedback.txt")
+#     with open(feedback_file, "a") as f:
+#         f.write(feedback + "\n")
+#     return "Feedback submitted successfully!"
+def save_feedback(feedback):
+    feedback_dir = "Chayan/Feedback/"  # Update this to your desired directory
+    if not os.path.exists(feedback_dir):
+        os.makedirs(feedback_dir)
+    feedback_file = os.path.join(feedback_dir, "feedback.txt")
+    try:
+        with open(feedback_file, "a") as f:
+            f.write(feedback + "\n")
+        print(f"Feedback saved at: {feedback_file}")
+        return "Feedback submitted successfully!"
+    except Exception as e:
+        print(f"Error saving feedback: {e}")
+        return "Failed to submit feedback!"
+# Custom Theme Definition
+class Seafoam(Base):
+    def __init__(
+        self,
+        *,
+        primary_hue: Union[colors.Color, str] = colors.emerald,
+        secondary_hue: Union[colors.Color, str] = colors.blue,
+        neutral_hue: Union[colors.Color, str] = colors.gray,
+        spacing_size: Union[sizes.Size, str] = sizes.spacing_md,
+        radius_size: Union[sizes.Size, str] = sizes.radius_md,
+        text_size: Union[sizes.Size, str] = sizes.text_lg,
+        font: Union[fonts.Font, str, Iterable[Union[fonts.Font, str]]] = (
+            fonts.GoogleFont("Quicksand"),
+            "ui-sans-serif",
+            "sans-serif",
+        ),
+        font_mono: Union[fonts.Font, str, Iterable[Union[fonts.Font, str]]] = (
+            fonts.GoogleFont("IBM Plex Mono"),
+            "ui-monospace",
+            "monospace",
+        ),
+    ):
+        super().__init__(
+            primary_hue=primary_hue,
+            secondary_hue=secondary_hue,
+            neutral_hue=neutral_hue,
+            spacing_size=spacing_size,
+            radius_size=radius_size,
+            text_size=text_size,
+            font=font,
+            font_mono=font_mono,
+        )
+        self.set(
+            body_background_fill="linear-gradient(114.2deg, rgba(184,215,21,1) -15.3%, rgba(21,215,98,1) 14.5%, rgba(21,215,182,1) 38.7%, rgba(129,189,240,1) 58.8%, rgba(219,108,205,1) 77.3%, rgba(240,129,129,1) 88.5%)"
+        )
+# Initialize the theme
+seafoam = Seafoam()
+# Custom CSS styles
+custom_css = """
+<style>
+/* Set background color for the entire Gradio app */
+body, .gradio-container {
+    background-color: #f2f7f5 !important;
+}
+/* Optional: Add padding or margin for aesthetics */
+.gradio-container {
+    padding: 20px;
+}
+#title {
+    color: green;
+    font-size: 36px;
+    font-weight: bold;
+}
+#description {
+    color: green;
+    font-size: 22px;
+}
+#title-row {
+    display: flex;
+    align-items: center;
+    gap: 10px;
+    margin-bottom: 0px;
+}
+#title-header h1 {
+    margin: 0;
+}
+#submit-btn {
+    background-color: #f5dec6; /* Banana leaf */
+    color: green;
+    padding: 15px 32px;
+    text-align: center;
+    text-decoration: none;
+    display: inline-block;
+    font-size: 30px;
+    margin: 4px 2px;
+    cursor: pointer;
+}
+#submit-btn:hover {
+    background-color: #00FFFF;
+}
+.intext textarea {
+    color: green;
+    font-size: 20px;
+    font-weight: bold;
+}
+.small-button {
+    color: green;
+    padding: 5px 10px;
+    font-size: 20px;
+}
+</style>
+"""
+# Sample image paths
+sample_images = [
+    "./Test-Images/0d930f0a-46f813a9-db3b137b-05142eef-eca3c5a7.jpg",
+    "./Test-Images/93681764-ec39480e-0518b12c-199850c2-f15118ab.jpg",
+    "./Test-Images/6ff741e9-6ea01eef-1bf10153-d1b6beba-590b6620.jpg"
+    #"sample4.png",
+    #"sample5.png"
+]
+def set_input_image(image_path):
+    return gr.update(value=image_path)
+def show_contact_info():
+    yield gr.update(visible=True, value="""
+    **Contact Us:**
+    - Chayan Mondal
+    - Email: [email protected]
+    - Associate Prof. Sonny Pham
+    - Email: [email protected]
+    - Dr. Ashu Gupta
+    - Email: [email protected]
+    """)
+    # Wait for 20 seconds (you can adjust the time as needed)
+    time.sleep(20)
+    # Hide the content after 5 seconds
+    yield gr.update(visible=False)
+def show_acknowledgment():
+    yield gr.update(visible=True, value="""
+    **Acknowledgment:**
+    This Research has been supported by the Western Australian Future Health Research and Innovation Fund.
+    """)
+    # Wait for 20 seconds
+    time.sleep(20)
+    # Hide the acknowledgment
+    yield gr.update(visible=False)
+with gr.Blocks(theme=seafoam, css=custom_css) as demo:
+    #gr.HTML(custom_css)  # Inject custom CSS
+    with gr.Row(elem_id="title-row"):
+        with gr.Column(scale=0):
+            gr.Image(
+                value="./AURA-CXR-Logo.png",
+                show_label=False,
+                width=60,
+                container=False
+                )
+        with gr.Column():
+            gr.Markdown(
+                """
+                <h1 style="color:blue; font-size: 32px; font-weight: bold; margin: 0;">
+                AURA-CXR: Explainable Diagnosis of Chest Diseases from X-rays
+                </h1>
+                """,
+                elem_id="title-header"
+            )
+    gr.Markdown(
+        "<p id='description'>Upload an X-ray image and get its report with heat-map visualization.</p>"
+    )
+   # gr.Markdown(
+   #      """
+   #      <h1 style="color:blue; font-size: 36px; font-weight: bold; margin: 0;">AURA-CXR: Explainable Diagnosis of Chest Diseases from X-rays</h1>
+   #       <p id="description">Upload an X-ray image and get its report with heat-map visualization.</p>
+   #     """
+   # )
+    #<h1 style="color:blue; font-size: 36px; font-weight: bold">AURA-CXR: Explainable Diagnosis of Chest Diseases from X-rays</h1>
+    with gr.Row():
+        inputs = gr.File(label="Upload Chest X-ray Image File", type="filepath")
+    with gr.Row():
+        with gr.Column(scale=1, min_width=300):
+            outputs1 = gr.Image(label="Image Viewer")
+            history_query = gr.Textbox(label="History/Doctor's Query", elem_classes="intext")
+        with gr.Column(scale=1, min_width=300):
+            outputs2 = gr.Image(label="Grad_CAM-Visualization")
+        with gr.Column(scale=1, min_width=300):
+            outputs3 = gr.Textbox(label="Generated Report", elem_classes = "intext")
+            outputs4 = gr.Textbox(label = "Query's Response", elem_classes = "intext")
+    submit_btn = gr.Button("Generate Report", elem_id="submit-btn", variant="primary")
+    def show_image(file_path):
+        if is_cxr(file_path):  # Check if it's a valid Chest X-ray
+            return file_path, "Valid Image"   # Show the image in Image Viewer
+        else:
+            return None, "Invalid image. Please upload a proper Chest X-ray."
+    # Show the uploaded image immediately in the Image Viewer
+    inputs.change(
+        fn=show_image,  # Calls the function to return the same file path
+        inputs=inputs,
+        outputs=[outputs1, outputs3]
+    )
+    submit_btn.click(
+        fn=xray_report_generator,
+        inputs=[inputs,history_query],
+        outputs=[outputs2, outputs3, outputs4])
+    gr.Markdown(
+        """
+        <h2 style="color:green; font-size: 24px;">Or choose a sample image:</h2>
+        """
+    )
+    with gr.Row():
+        for idx, sample_image in enumerate(sample_images):
+            with gr.Column(scale=1):
+                #sample_image_component = gr.Image(value=sample_image, interactive=False)
+                select_button = gr.Button(f"Select Sample Image {idx+1}")
+                select_button.click(
+                    fn=set_input_image,
+                    inputs=gr.State(value=sample_image),
+                    outputs=inputs
+                )
+      # Feedback section
+    gr.Markdown(
+        """
+        <h2 style="color:green; font-size: 24px;">Provide Your Valuable Feedback:</h2>
+        """
+    )
+    with gr.Row():
+        feedback_input = gr.Textbox(label="Your Feedback", lines=4, placeholder="Enter your feedback here...")
+        feedback_submit_btn = gr.Button("Submit Feedback", elem_classes="small-button", variant="secondary")
+        feedback_output = gr.Textbox(label="Feedback Status", interactive=False)
+    feedback_submit_btn.click(
+        fn=save_feedback,
+        inputs=feedback_input,
+        outputs=feedback_output
+    )
+    # Buttons and Markdown for Contact Us and Acknowledgment
+    with gr.Row():
+        contact_btn = gr.Button("Contact Us", elem_classes="small-button", variant="secondary")
+        ack_btn = gr.Button("Acknowledgment", elem_classes="small-button", variant="secondary")
+    contact_info = gr.Markdown(visible=False)  # Initially hidden
+    acknowledgment_info = gr.Markdown(visible=False)  # Initially hidden
+    # Update the content and make it visible when the buttons are clicked
+    contact_btn.click(fn=show_contact_info, outputs=contact_info, show_progress=False)
+    ack_btn.click(fn=show_acknowledgment, outputs=acknowledgment_info, show_progress=False)
+    # Update the content and make it visible when the buttons are clicked
+    # contact_btn.click(fn=show_contact_info, outputs=contact_info, show_progress=False)
+    # ack_btn.click(fn=show_acknowledgment, outputs=acknowledgment_info, show_progress=False)
+demo.launch(share=True)

Feedback/feedback.txt ADDED Viewed

File without changes

Model/config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Model/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "max_length": 200,
+  "transformers_version": "4.37.1"
+}

Model/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb50b4debdf509c1f8c4dbbf344031528969f45426d358c62d39edcad08452ea
+size 965957568

Model/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "feature_extractor_type": "ViTFeatureExtractor",
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 384,
+    "width": 384
+  }
+}

Model/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ed7612de6b8d4c06ccacb9ae48d72f25eaa405bb7d12ebc21c86121cca30197
+size 14575

Model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9f8ff02ac948318fd4b1db36c6dc3626126a027e501925cfc3bd76ac45c3505
+size 627

Model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

Model/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

Model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,536 @@

+{
+  "best_metric": 0.0629316121339798,
+  "best_model_checkpoint": "./Swin-GPT2_Mimic/checkpoint-37500",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 37500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.07,
+      "learning_rate": 4.9833333333333336e-05,
+      "loss": 0.1362,
+      "step": 500
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 4.966666666666667e-05,
+      "loss": 0.089,
+      "step": 1000
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 4.9500000000000004e-05,
+      "loss": 0.0805,
+      "step": 1500
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 4.933333333333334e-05,
+      "loss": 0.0779,
+      "step": 2000
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 4.9166666666666665e-05,
+      "loss": 0.0775,
+      "step": 2500
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 4.9e-05,
+      "loss": 0.0763,
+      "step": 3000
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 4.883333333333334e-05,
+      "loss": 0.0749,
+      "step": 3500
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 4.866666666666667e-05,
+      "loss": 0.0702,
+      "step": 4000
+    },
+    {
+      "epoch": 0.6,
+      "learning_rate": 4.85e-05,
+      "loss": 0.0701,
+      "step": 4500
+    },
+    {
+      "epoch": 0.67,
+      "learning_rate": 4.8333333333333334e-05,
+      "loss": 0.0715,
+      "step": 5000
+    },
+    {
+      "epoch": 0.73,
+      "learning_rate": 4.8166666666666674e-05,
+      "loss": 0.0725,
+      "step": 5500
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 4.8e-05,
+      "loss": 0.0677,
+      "step": 6000
+    },
+    {
+      "epoch": 0.87,
+      "learning_rate": 4.7833333333333335e-05,
+      "loss": 0.0696,
+      "step": 6500
+    },
+    {
+      "epoch": 0.93,
+      "learning_rate": 4.766666666666667e-05,
+      "loss": 0.065,
+      "step": 7000
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.75e-05,
+      "loss": 0.0646,
+      "step": 7500
+    },
+    {
+      "epoch": 1.0,
+      "eval_gen_len": 8.897,
+      "eval_loss": 0.06988305598497391,
+      "eval_rouge1": 34.7412,
+      "eval_rouge2": 25.6954,
+      "eval_rougeL": 34.4803,
+      "eval_rougeLsum": 34.7871,
+      "eval_runtime": 103.0848,
+      "eval_samples_per_second": 9.701,
+      "eval_steps_per_second": 1.213,
+      "step": 7500
+    },
+    {
+      "epoch": 1.07,
+      "learning_rate": 4.7333333333333336e-05,
+      "loss": 0.0651,
+      "step": 8000
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 4.716666666666667e-05,
+      "loss": 0.0647,
+      "step": 8500
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 4.7e-05,
+      "loss": 0.0644,
+      "step": 9000
+    },
+    {
+      "epoch": 1.27,
+      "learning_rate": 4.683333333333334e-05,
+      "loss": 0.0613,
+      "step": 9500
+    },
+    {
+      "epoch": 1.33,
+      "learning_rate": 4.666666666666667e-05,
+      "loss": 0.0664,
+      "step": 10000
+    },
+    {
+      "epoch": 1.4,
+      "learning_rate": 4.6500000000000005e-05,
+      "loss": 0.0631,
+      "step": 10500
+    },
+    {
+      "epoch": 1.47,
+      "learning_rate": 4.633333333333333e-05,
+      "loss": 0.0623,
+      "step": 11000
+    },
+    {
+      "epoch": 1.53,
+      "learning_rate": 4.6166666666666666e-05,
+      "loss": 0.0612,
+      "step": 11500
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 4.600000000000001e-05,
+      "loss": 0.062,
+      "step": 12000
+    },
+    {
+      "epoch": 1.67,
+      "learning_rate": 4.5833333333333334e-05,
+      "loss": 0.0605,
+      "step": 12500
+    },
+    {
+      "epoch": 1.73,
+      "learning_rate": 4.566666666666667e-05,
+      "loss": 0.0619,
+      "step": 13000
+    },
+    {
+      "epoch": 1.8,
+      "learning_rate": 4.55e-05,
+      "loss": 0.062,
+      "step": 13500
+    },
+    {
+      "epoch": 1.87,
+      "learning_rate": 4.5333333333333335e-05,
+      "loss": 0.0622,
+      "step": 14000
+    },
+    {
+      "epoch": 1.93,
+      "learning_rate": 4.516666666666667e-05,
+      "loss": 0.06,
+      "step": 14500
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 4.5e-05,
+      "loss": 0.0597,
+      "step": 15000
+    },
+    {
+      "epoch": 2.0,
+      "eval_gen_len": 14.724,
+      "eval_loss": 0.06516863405704498,
+      "eval_rouge1": 38.0809,
+      "eval_rouge2": 26.9533,
+      "eval_rougeL": 37.259,
+      "eval_rougeLsum": 37.8078,
+      "eval_runtime": 113.6453,
+      "eval_samples_per_second": 8.799,
+      "eval_steps_per_second": 1.1,
+      "step": 15000
+    },
+    {
+      "epoch": 2.07,
+      "learning_rate": 4.483333333333333e-05,
+      "loss": 0.0559,
+      "step": 15500
+    },
+    {
+      "epoch": 2.13,
+      "learning_rate": 4.466666666666667e-05,
+      "loss": 0.0595,
+      "step": 16000
+    },
+    {
+      "epoch": 2.2,
+      "learning_rate": 4.4500000000000004e-05,
+      "loss": 0.0569,
+      "step": 16500
+    },
+    {
+      "epoch": 2.27,
+      "learning_rate": 4.433333333333334e-05,
+      "loss": 0.0558,
+      "step": 17000
+    },
+    {
+      "epoch": 2.33,
+      "learning_rate": 4.4166666666666665e-05,
+      "loss": 0.0578,
+      "step": 17500
+    },
+    {
+      "epoch": 2.4,
+      "learning_rate": 4.4000000000000006e-05,
+      "loss": 0.0571,
+      "step": 18000
+    },
+    {
+      "epoch": 2.47,
+      "learning_rate": 4.383333333333334e-05,
+      "loss": 0.0586,
+      "step": 18500
+    },
+    {
+      "epoch": 2.53,
+      "learning_rate": 4.3666666666666666e-05,
+      "loss": 0.0577,
+      "step": 19000
+    },
+    {
+      "epoch": 2.6,
+      "learning_rate": 4.35e-05,
+      "loss": 0.0583,
+      "step": 19500
+    },
+    {
+      "epoch": 2.67,
+      "learning_rate": 4.3333333333333334e-05,
+      "loss": 0.0574,
+      "step": 20000
+    },
+    {
+      "epoch": 2.73,
+      "learning_rate": 4.316666666666667e-05,
+      "loss": 0.0563,
+      "step": 20500
+    },
+    {
+      "epoch": 2.8,
+      "learning_rate": 4.3e-05,
+      "loss": 0.057,
+      "step": 21000
+    },
+    {
+      "epoch": 2.87,
+      "learning_rate": 4.2833333333333335e-05,
+      "loss": 0.0559,
+      "step": 21500
+    },
+    {
+      "epoch": 2.93,
+      "learning_rate": 4.266666666666667e-05,
+      "loss": 0.0565,
+      "step": 22000
+    },
+    {
+      "epoch": 3.0,
+      "learning_rate": 4.25e-05,
+      "loss": 0.0577,
+      "step": 22500
+    },
+    {
+      "epoch": 3.0,
+      "eval_gen_len": 13.501,
+      "eval_loss": 0.06393314898014069,
+      "eval_rouge1": 37.8142,
+      "eval_rouge2": 26.9542,
+      "eval_rougeL": 37.076,
+      "eval_rougeLsum": 37.5874,
+      "eval_runtime": 112.3223,
+      "eval_samples_per_second": 8.903,
+      "eval_steps_per_second": 1.113,
+      "step": 22500
+    },
+    {
+      "epoch": 3.07,
+      "learning_rate": 4.233333333333334e-05,
+      "loss": 0.0511,
+      "step": 23000
+    },
+    {
+      "epoch": 3.13,
+      "learning_rate": 4.216666666666667e-05,
+      "loss": 0.0526,
+      "step": 23500
+    },
+    {
+      "epoch": 3.2,
+      "learning_rate": 4.2e-05,
+      "loss": 0.0514,
+      "step": 24000
+    },
+    {
+      "epoch": 3.27,
+      "learning_rate": 4.183333333333334e-05,
+      "loss": 0.053,
+      "step": 24500
+    },
+    {
+      "epoch": 3.33,
+      "learning_rate": 4.166666666666667e-05,
+      "loss": 0.0526,
+      "step": 25000
+    },
+    {
+      "epoch": 3.4,
+      "learning_rate": 4.15e-05,
+      "loss": 0.0542,
+      "step": 25500
+    },
+    {
+      "epoch": 3.47,
+      "learning_rate": 4.133333333333333e-05,
+      "loss": 0.0533,
+      "step": 26000
+    },
+    {
+      "epoch": 3.53,
+      "learning_rate": 4.116666666666667e-05,
+      "loss": 0.0537,
+      "step": 26500
+    },
+    {
+      "epoch": 3.6,
+      "learning_rate": 4.1e-05,
+      "loss": 0.0519,
+      "step": 27000
+    },
+    {
+      "epoch": 3.67,
+      "learning_rate": 4.0833333333333334e-05,
+      "loss": 0.0532,
+      "step": 27500
+    },
+    {
+      "epoch": 3.73,
+      "learning_rate": 4.066666666666667e-05,
+      "loss": 0.0538,
+      "step": 28000
+    },
+    {
+      "epoch": 3.8,
+      "learning_rate": 4.05e-05,
+      "loss": 0.0533,
+      "step": 28500
+    },
+    {
+      "epoch": 3.87,
+      "learning_rate": 4.0333333333333336e-05,
+      "loss": 0.0544,
+      "step": 29000
+    },
+    {
+      "epoch": 3.93,
+      "learning_rate": 4.016666666666667e-05,
+      "loss": 0.0536,
+      "step": 29500
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 4e-05,
+      "loss": 0.0528,
+      "step": 30000
+    },
+    {
+      "epoch": 4.0,
+      "eval_gen_len": 11.784,
+      "eval_loss": 0.06298327445983887,
+      "eval_rouge1": 37.8876,
+      "eval_rouge2": 26.9586,
+      "eval_rougeL": 37.2585,
+      "eval_rougeLsum": 37.7378,
+      "eval_runtime": 109.3283,
+      "eval_samples_per_second": 9.147,
+      "eval_steps_per_second": 1.143,
+      "step": 30000
+    },
+    {
+      "epoch": 4.07,
+      "learning_rate": 3.983333333333333e-05,
+      "loss": 0.0488,
+      "step": 30500
+    },
+    {
+      "epoch": 4.13,
+      "learning_rate": 3.966666666666667e-05,
+      "loss": 0.0475,
+      "step": 31000
+    },
+    {
+      "epoch": 4.2,
+      "learning_rate": 3.9500000000000005e-05,
+      "loss": 0.0487,
+      "step": 31500
+    },
+    {
+      "epoch": 4.27,
+      "learning_rate": 3.933333333333333e-05,
+      "loss": 0.0493,
+      "step": 32000
+    },
+    {
+      "epoch": 4.33,
+      "learning_rate": 3.9166666666666665e-05,
+      "loss": 0.0482,
+      "step": 32500
+    },
+    {
+      "epoch": 4.4,
+      "learning_rate": 3.9000000000000006e-05,
+      "loss": 0.0504,
+      "step": 33000
+    },
+    {
+      "epoch": 4.47,
+      "learning_rate": 3.883333333333333e-05,
+      "loss": 0.0495,
+      "step": 33500
+    },
+    {
+      "epoch": 4.53,
+      "learning_rate": 3.866666666666667e-05,
+      "loss": 0.0477,
+      "step": 34000
+    },
+    {
+      "epoch": 4.6,
+      "learning_rate": 3.85e-05,
+      "loss": 0.049,
+      "step": 34500
+    },
+    {
+      "epoch": 4.67,
+      "learning_rate": 3.8333333333333334e-05,
+      "loss": 0.0483,
+      "step": 35000
+    },
+    {
+      "epoch": 4.73,
+      "learning_rate": 3.816666666666667e-05,
+      "loss": 0.0509,
+      "step": 35500
+    },
+    {
+      "epoch": 4.8,
+      "learning_rate": 3.8e-05,
+      "loss": 0.0505,
+      "step": 36000
+    },
+    {
+      "epoch": 4.87,
+      "learning_rate": 3.7833333333333336e-05,
+      "loss": 0.0506,
+      "step": 36500
+    },
+    {
+      "epoch": 4.93,
+      "learning_rate": 3.766666666666667e-05,
+      "loss": 0.049,
+      "step": 37000
+    },
+    {
+      "epoch": 5.0,
+      "learning_rate": 3.7500000000000003e-05,
+      "loss": 0.0485,
+      "step": 37500
+    },
+    {
+      "epoch": 5.0,
+      "eval_gen_len": 14.157,
+      "eval_loss": 0.0629316121339798,
+      "eval_rouge1": 39.0822,
+      "eval_rouge2": 27.4073,
+      "eval_rougeL": 38.1885,
+      "eval_rougeLsum": 38.8776,
+      "eval_runtime": 112.3853,
+      "eval_samples_per_second": 8.898,
+      "eval_steps_per_second": 1.112,
+      "step": 37500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 150000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "total_flos": 1.601193167290368e+20,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

Model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6dd77040949647fa4b081f2f6be19d1ed5b3019d92fd8ecb74d288af93cd6290
+size 4411

Model/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Test-Images/0d930f0a-46f813a9-db3b137b-05142eef-eca3c5a7.jpg ADDED Viewed

Git LFS Details

SHA256: f80804e8e2532e0bb3665c1790380728b67eb79f0fc4d6c0b9163d2596ea5ff3
Pointer size: 132 Bytes
Size of remote file: 1.93 MB

Test-Images/6ff741e9-6ea01eef-1bf10153-d1b6beba-590b6620.jpg ADDED Viewed

Git LFS Details

SHA256: 7c1aa5b4227347d97ace457c47d05f4df8aadc2df32b48ffac0dd4fe625f59ac
Pointer size: 132 Bytes
Size of remote file: 1.77 MB

Test-Images/93681764-ec39480e-0518b12c-199850c2-f15118ab.jpg ADDED Viewed

Git LFS Details

SHA256: 4c133343b1322cd385660c297657f864e0c1e905147088e8c514caa0225b978e
Pointer size: 132 Bytes
Size of remote file: 1.74 MB

pytorch_grad_cam/Readme.md ADDED Viewed

	@@ -0,0 +1,29 @@

+#### Grad-CAM visualization of any VisionEncoderDecoder model
+# Step 1: Open /pytorch_grad_cam folder and make sure that in init.py all the CAM version is imported as the class name not the python file. For example
+                 from pytorch_grad_cam.grad_cam import GradCAM
+because when in the main python code (Grad_CAM_Visualization.py) we want to import every Class directly.
+# Step2: Open the main Grad-CAM code: Grad_CAM_Visualization.py and edit the following function according to your model.
+#                             "def reshape_transform(tensor, height=14, width=14):
+                                  result = tensor[:, 1:, :].reshape(tensor.size(0),
+                                      height, width, tensor.size(2))
+                                  result = result.transpose(2, 3).transpose(1, 2)
+#                                 return result"
+here as the resized image tensor was [150,528] which should be equivalent to the reshaped transform of [1,14,14,768]
+## The error message should be like this if any mismatch:
+              RuntimeError: shape '[1, 16, 16, 768]' is invalid for input of size 150528
+# Step 3: Choose your desired model from (DeIT_Base16_Pretrained with ImageNeT, Customized VisionTransformer, Dino_Base16_Pretrained with ImageNeT, My customized DeiT-CXR model, My customized EfficientNet model, and ##VisionEncoderDecoder Model)
+# Step 4: Open base_cam.py file and go to the "forward" function of Class BaseCAM.
+          Write extra line "outputs = outputs.pooler_output" for ##VisionEncoderDecoder Model as we need to take the tensor of pooler_output of the model configuration. Follow the comment line as well.
+# Step 5: Then follow the comments in the Grad_CAM_Visualization.py:
+              use model.encoder instead of model for ## VisionEncoderDecoder Model
+              use different target_layers for different model
+              target_layers = [model.encoder.encoder.layer[-1].layernorm_before] for ## VisionEncoderDecoder Model
+# Step 6: Change the image_path and output_path accordingly
+# Step 7: Run python Grad_CAM_Visualization.py --use-cuda --image-path "directory/image_path" --method "any grad-cam method defined in the code"

pytorch_grad_cam/__init__.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from pytorch_grad_cam.grad_cam import GradCAM
+from pytorch_grad_cam.hirescam import HiResCAM
+from pytorch_grad_cam.grad_cam_elementwise import GradCAMElementWise
+from pytorch_grad_cam.ablation_layer import AblationLayer, AblationLayerVit, AblationLayerFasterRCNN
+from pytorch_grad_cam.ablation_cam import AblationCAM
+from pytorch_grad_cam.xgrad_cam import XGradCAM
+from pytorch_grad_cam.grad_cam_plusplus import GradCAMPlusPlus
+from pytorch_grad_cam.score_cam import ScoreCAM
+from pytorch_grad_cam.layer_cam import LayerCAM
+from pytorch_grad_cam.eigen_cam import EigenCAM
+from pytorch_grad_cam.eigen_grad_cam import EigenGradCAM
+from pytorch_grad_cam.random_cam import RandomCAM
+from pytorch_grad_cam.fullgrad_cam import FullGrad
+from pytorch_grad_cam.guided_backprop import GuidedBackpropReLUModel
+from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients
+from pytorch_grad_cam.feature_factorization.deep_feature_factorization import DeepFeatureFactorization, run_dff_on_image
+import pytorch_grad_cam.utils.model_targets
+import pytorch_grad_cam.utils.reshape_transforms
+import pytorch_grad_cam.metrics.cam_mult_image
+import pytorch_grad_cam.metrics.road

pytorch_grad_cam/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (1.57 kB). View file

pytorch_grad_cam/__pycache__/ablation_cam.cpython-39.pyc ADDED Viewed

Binary file (3.7 kB). View file

pytorch_grad_cam/__pycache__/ablation_layer.cpython-39.pyc ADDED Viewed

Binary file (5.25 kB). View file

pytorch_grad_cam/__pycache__/activations_and_gradients.cpython-39.pyc ADDED Viewed

Binary file (1.9 kB). View file

pytorch_grad_cam/__pycache__/base_cam.cpython-39.pyc ADDED Viewed

Binary file (5.86 kB). View file

pytorch_grad_cam/__pycache__/eigen_cam.cpython-39.pyc ADDED Viewed

Binary file (948 Bytes). View file

pytorch_grad_cam/__pycache__/eigen_grad_cam.cpython-39.pyc ADDED Viewed

Binary file (942 Bytes). View file

pytorch_grad_cam/__pycache__/fullgrad_cam.cpython-39.pyc ADDED Viewed

Binary file (3.2 kB). View file

pytorch_grad_cam/__pycache__/grad_cam.cpython-39.pyc ADDED Viewed

Binary file (889 Bytes). View file

pytorch_grad_cam/__pycache__/grad_cam_elementwise.cpython-39.pyc ADDED Viewed

Binary file (1.11 kB). View file

pytorch_grad_cam/__pycache__/grad_cam_plusplus.cpython-39.pyc ADDED Viewed

Binary file (1.14 kB). View file

pytorch_grad_cam/__pycache__/guided_backprop.cpython-39.pyc ADDED Viewed

Binary file (3.43 kB). View file

pytorch_grad_cam/__pycache__/hirescam.cpython-39.pyc ADDED Viewed

Binary file (1.14 kB). View file

pytorch_grad_cam/__pycache__/layer_cam.cpython-39.pyc ADDED Viewed

Binary file (1.07 kB). View file

pytorch_grad_cam/__pycache__/random_cam.cpython-39.pyc ADDED Viewed

Binary file (938 Bytes). View file

pytorch_grad_cam/__pycache__/score_cam.cpython-39.pyc ADDED Viewed

Binary file (1.97 kB). View file

pytorch_grad_cam/__pycache__/xgrad_cam.cpython-39.pyc ADDED Viewed

Binary file (1 kB). View file

pytorch_grad_cam/ablation_cam.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import numpy as np
+import torch
+import tqdm
+from typing import Callable, List
+from pytorch_grad_cam.base_cam import BaseCAM
+from pytorch_grad_cam.utils.find_layers import replace_layer_recursive
+from pytorch_grad_cam.ablation_layer import AblationLayer
+""" Implementation of AblationCAM
+https://openaccess.thecvf.com/content_WACV_2020/papers/Desai_Ablation-CAM_Visual_Explanations_for_Deep_Convolutional_Network_via_Gradient-free_Localization_WACV_2020_paper.pdf
+Ablate individual activations, and then measure the drop in the target score.
+In the current implementation, the target layer activations is cached, so it won't be re-computed.
+However layers before it, if any, will not be cached.
+This means that if the target layer is a large block, for example model.featuers (in vgg), there will
+be a large save in run time.
+Since we have to go over many channels and ablate them, and every channel ablation requires a forward pass,
+it would be nice if we could avoid doing that for channels that won't contribute anwyay, making it much faster.
+The parameter ratio_channels_to_ablate controls how many channels should be ablated, using an experimental method
+(to be improved). The default 1.0 value means that all channels will be ablated.
+"""
+class AblationCAM(BaseCAM):
+    def __init__(self,
+                 model: torch.nn.Module,
+                 target_layers: List[torch.nn.Module],
+                 use_cuda: bool = False,
+                 reshape_transform: Callable = None,
+                 ablation_layer: torch.nn.Module = AblationLayer(),
+                 batch_size: int = 32,
+                 ratio_channels_to_ablate: float = 1.0) -> None:
+        super(AblationCAM, self).__init__(model,
+                                          target_layers,
+                                          use_cuda,
+                                          reshape_transform,
+                                          uses_gradients=False)
+        self.batch_size = batch_size
+        self.ablation_layer = ablation_layer
+        self.ratio_channels_to_ablate = ratio_channels_to_ablate
+    def save_activation(self, module, input, output) -> None:
+        """ Helper function to save the raw activations from the target layer """
+        self.activations = output
+    def assemble_ablation_scores(self,
+                                 new_scores: list,
+                                 original_score: float,
+                                 ablated_channels: np.ndarray,
+                                 number_of_channels: int) -> np.ndarray:
+        """ Take the value from the channels that were ablated,
+            and just set the original score for the channels that were skipped """
+        index = 0
+        result = []
+        sorted_indices = np.argsort(ablated_channels)
+        ablated_channels = ablated_channels[sorted_indices]
+        new_scores = np.float32(new_scores)[sorted_indices]
+        for i in range(number_of_channels):
+            if index < len(ablated_channels) and ablated_channels[index] == i:
+                weight = new_scores[index]
+                index = index + 1
+            else:
+                weight = original_score
+            result.append(weight)
+        return result
+    def get_cam_weights(self,
+                        input_tensor: torch.Tensor,
+                        target_layer: torch.nn.Module,
+                        targets: List[Callable],
+                        activations: torch.Tensor,
+                        grads: torch.Tensor) -> np.ndarray:
+        # Do a forward pass, compute the target scores, and cache the
+        # activations
+        handle = target_layer.register_forward_hook(self.save_activation)
+        with torch.no_grad():
+            outputs = self.model(input_tensor)
+            handle.remove()
+            original_scores = np.float32(
+                [target(output).cpu().item() for target, output in zip(targets, outputs)])
+        # Replace the layer with the ablation layer.
+        # When we finish, we will replace it back, so the original model is
+        # unchanged.
+        ablation_layer = self.ablation_layer
+        replace_layer_recursive(self.model, target_layer, ablation_layer)
+        number_of_channels = activations.shape[1]
+        weights = []
+        # This is a "gradient free" method, so we don't need gradients here.
+        with torch.no_grad():
+            # Loop over each of the batch images and ablate activations for it.
+            for batch_index, (target, tensor) in enumerate(
+                    zip(targets, input_tensor)):
+                new_scores = []
+                batch_tensor = tensor.repeat(self.batch_size, 1, 1, 1)
+                # Check which channels should be ablated. Normally this will be all channels,
+                # But we can also try to speed this up by using a low
+                # ratio_channels_to_ablate.
+                channels_to_ablate = ablation_layer.activations_to_be_ablated(
+                    activations[batch_index, :], self.ratio_channels_to_ablate)
+                number_channels_to_ablate = len(channels_to_ablate)
+                for i in tqdm.tqdm(
+                    range(
+                        0,
+                        number_channels_to_ablate,
+                        self.batch_size)):
+                    if i + self.batch_size > number_channels_to_ablate:
+                        batch_tensor = batch_tensor[:(
+                            number_channels_to_ablate - i)]
+                    # Change the state of the ablation layer so it ablates the next channels.
+                    # TBD: Move this into the ablation layer forward pass.
+                    ablation_layer.set_next_batch(
+                        input_batch_index=batch_index,
+                        activations=self.activations,
+                        num_channels_to_ablate=batch_tensor.size(0))
+                    score = [target(o).cpu().item()
+                             for o in self.model(batch_tensor)]
+                    new_scores.extend(score)
+                    ablation_layer.indices = ablation_layer.indices[batch_tensor.size(
+                        0):]
+                new_scores = self.assemble_ablation_scores(
+                    new_scores,
+                    original_scores[batch_index],
+                    channels_to_ablate,
+                    number_of_channels)
+                weights.extend(new_scores)
+        weights = np.float32(weights)
+        weights = weights.reshape(activations.shape[:2])
+        original_scores = original_scores[:, None]
+        weights = (original_scores - weights) / original_scores
+        # Replace the model back to the original state
+        replace_layer_recursive(self.model, ablation_layer, target_layer)
+        return weights

pytorch_grad_cam/ablation_cam_multilayer.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import cv2
+import numpy as np
+import torch
+import tqdm
+from pytorch_grad_cam.base_cam import BaseCAM
+class AblationLayer(torch.nn.Module):
+    def __init__(self, layer, reshape_transform, indices):
+        super(AblationLayer, self).__init__()
+        self.layer = layer
+        self.reshape_transform = reshape_transform
+        # The channels to zero out:
+        self.indices = indices
+    def forward(self, x):
+        self.__call__(x)
+    def __call__(self, x):
+        output = self.layer(x)
+        # Hack to work with ViT,
+        # Since the activation channels are last and not first like in CNNs
+        # Probably should remove it?
+        if self.reshape_transform is not None:
+            output = output.transpose(1, 2)
+        for i in range(output.size(0)):
+            # Commonly the minimum activation will be 0,
+            # And then it makes sense to zero it out.
+            # However depending on the architecture,
+            # If the values can be negative, we use very negative values
+            # to perform the ablation, deviating from the paper.
+            if torch.min(output) == 0:
+                output[i, self.indices[i], :] = 0
+            else:
+                ABLATION_VALUE = 1e5
+                output[i, self.indices[i], :] = torch.min(
+                    output) - ABLATION_VALUE
+        if self.reshape_transform is not None:
+            output = output.transpose(2, 1)
+        return output
+def replace_layer_recursive(model, old_layer, new_layer):
+    for name, layer in model._modules.items():
+        if layer == old_layer:
+            model._modules[name] = new_layer
+            return True
+        elif replace_layer_recursive(layer, old_layer, new_layer):
+            return True
+    return False
+class AblationCAM(BaseCAM):
+    def __init__(self, model, target_layers, use_cuda=False,
+                 reshape_transform=None):
+        super(AblationCAM, self).__init__(model, target_layers, use_cuda,
+                                          reshape_transform)
+        if len(target_layers) > 1:
+            print(
+                "Warning. You are usign Ablation CAM with more than 1 layers. "
+                "This is supported only if all layers have the same output shape")
+    def set_ablation_layers(self):
+        self.ablation_layers = []
+        for target_layer in self.target_layers:
+            ablation_layer = AblationLayer(target_layer,
+                                           self.reshape_transform, indices=[])
+            self.ablation_layers.append(ablation_layer)
+            replace_layer_recursive(self.model, target_layer, ablation_layer)
+    def unset_ablation_layers(self):
+        # replace the model back to the original state
+        for ablation_layer, target_layer in zip(
+                self.ablation_layers, self.target_layers):
+            replace_layer_recursive(self.model, ablation_layer, target_layer)
+    def set_ablation_layer_batch_indices(self, indices):
+        for ablation_layer in self.ablation_layers:
+            ablation_layer.indices = indices
+    def trim_ablation_layer_batch_indices(self, keep):
+        for ablation_layer in self.ablation_layers:
+            ablation_layer.indices = ablation_layer.indices[:keep]
+    def get_cam_weights(self,
+                        input_tensor,
+                        target_category,
+                        activations,
+                        grads):
+        with torch.no_grad():
+            outputs = self.model(input_tensor).cpu().numpy()
+            original_scores = []
+            for i in range(input_tensor.size(0)):
+                original_scores.append(outputs[i, target_category[i]])
+        original_scores = np.float32(original_scores)
+        self.set_ablation_layers()
+        if hasattr(self, "batch_size"):
+            BATCH_SIZE = self.batch_size
+        else:
+            BATCH_SIZE = 32
+        number_of_channels = activations.shape[1]
+        weights = []
+        with torch.no_grad():
+            # Iterate over the input batch
+            for tensor, category in zip(input_tensor, target_category):
+                batch_tensor = tensor.repeat(BATCH_SIZE, 1, 1, 1)
+                for i in tqdm.tqdm(range(0, number_of_channels, BATCH_SIZE)):
+                    self.set_ablation_layer_batch_indices(
+                        list(range(i, i + BATCH_SIZE)))
+                    if i + BATCH_SIZE > number_of_channels:
+                        keep = number_of_channels - i
+                        batch_tensor = batch_tensor[:keep]
+                        self.trim_ablation_layer_batch_indices(self, keep)
+                    score = self.model(batch_tensor)[:, category].cpu().numpy()
+                    weights.extend(score)
+        weights = np.float32(weights)
+        weights = weights.reshape(activations.shape[:2])
+        original_scores = original_scores[:, None]
+        weights = (original_scores - weights) / original_scores
+        # replace the model back to the original state
+        self.unset_ablation_layers()
+        return weights

pytorch_grad_cam/ablation_layer.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import torch
+from collections import OrderedDict
+import numpy as np
+from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection
+class AblationLayer(torch.nn.Module):
+    def __init__(self):
+        super(AblationLayer, self).__init__()
+    def objectiveness_mask_from_svd(self, activations, threshold=0.01):
+        """ Experimental method to get a binary mask to compare if the activation is worth ablating.
+            The idea is to apply the EigenCAM method by doing PCA on the activations.
+            Then we create a binary mask by comparing to a low threshold.
+            Areas that are masked out, are probably not interesting anyway.
+        """
+        projection = get_2d_projection(activations[None, :])[0, :]
+        projection = np.abs(projection)
+        projection = projection - projection.min()
+        projection = projection / projection.max()
+        projection = projection > threshold
+        return projection
+    def activations_to_be_ablated(
+            self,
+            activations,
+            ratio_channels_to_ablate=1.0):
+        """ Experimental method to get a binary mask to compare if the activation is worth ablating.
+            Create a binary CAM mask with objectiveness_mask_from_svd.
+            Score each Activation channel, by seeing how much of its values are inside the mask.
+            Then keep the top channels.
+        """
+        if ratio_channels_to_ablate == 1.0:
+            self.indices = np.int32(range(activations.shape[0]))
+            return self.indices
+        projection = self.objectiveness_mask_from_svd(activations)
+        scores = []
+        for channel in activations:
+            normalized = np.abs(channel)
+            normalized = normalized - normalized.min()
+            normalized = normalized / np.max(normalized)
+            score = (projection * normalized).sum() / normalized.sum()
+            scores.append(score)
+        scores = np.float32(scores)
+        indices = list(np.argsort(scores))
+        high_score_indices = indices[::-
+                                     1][: int(len(indices) *
+                                              ratio_channels_to_ablate)]
+        low_score_indices = indices[: int(
+            len(indices) * ratio_channels_to_ablate)]
+        self.indices = np.int32(high_score_indices + low_score_indices)
+        return self.indices
+    def set_next_batch(
+            self,
+            input_batch_index,
+            activations,
+            num_channels_to_ablate):
+        """ This creates the next batch of activations from the layer.
+            Just take corresponding batch member from activations, and repeat it num_channels_to_ablate times.
+        """
+        self.activations = activations[input_batch_index, :, :, :].clone(
+        ).unsqueeze(0).repeat(num_channels_to_ablate, 1, 1, 1)
+    def __call__(self, x):
+        output = self.activations
+        for i in range(output.size(0)):
+            # Commonly the minimum activation will be 0,
+            # And then it makes sense to zero it out.
+            # However depending on the architecture,
+            # If the values can be negative, we use very negative values
+            # to perform the ablation, deviating from the paper.
+            if torch.min(output) == 0:
+                output[i, self.indices[i], :] = 0
+            else:
+                ABLATION_VALUE = 1e7
+                output[i, self.indices[i], :] = torch.min(
+                    output) - ABLATION_VALUE
+        return output
+class AblationLayerVit(AblationLayer):
+    def __init__(self):
+        super(AblationLayerVit, self).__init__()
+    def __call__(self, x):
+        output = self.activations
+        output = output.transpose(1, len(output.shape) - 1)
+        for i in range(output.size(0)):
+            # Commonly the minimum activation will be 0,
+            # And then it makes sense to zero it out.
+            # However depending on the architecture,
+            # If the values can be negative, we use very negative values
+            # to perform the ablation, deviating from the paper.
+            if torch.min(output) == 0:
+                output[i, self.indices[i], :] = 0
+            else:
+                ABLATION_VALUE = 1e7
+                output[i, self.indices[i], :] = torch.min(
+                    output) - ABLATION_VALUE
+        output = output.transpose(len(output.shape) - 1, 1)
+        return output
+    def set_next_batch(
+            self,
+            input_batch_index,
+            activations,
+            num_channels_to_ablate):
+        """ This creates the next batch of activations from the layer.
+            Just take corresponding batch member from activations, and repeat it num_channels_to_ablate times.
+        """
+        repeat_params = [num_channels_to_ablate] + \
+            len(activations.shape[:-1]) * [1]
+        self.activations = activations[input_batch_index, :, :].clone(
+        ).unsqueeze(0).repeat(*repeat_params)
+class AblationLayerFasterRCNN(AblationLayer):
+    def __init__(self):
+        super(AblationLayerFasterRCNN, self).__init__()
+    def set_next_batch(
+            self,
+            input_batch_index,
+            activations,
+            num_channels_to_ablate):
+        """ Extract the next batch member from activations,
+            and repeat it num_channels_to_ablate times.
+        """
+        self.activations = OrderedDict()
+        for key, value in activations.items():
+            fpn_activation = value[input_batch_index,
+                                   :, :, :].clone().unsqueeze(0)
+            self.activations[key] = fpn_activation.repeat(
+                num_channels_to_ablate, 1, 1, 1)
+    def __call__(self, x):
+        result = self.activations
+        layers = {0: '0', 1: '1', 2: '2', 3: '3', 4: 'pool'}
+        num_channels_to_ablate = result['pool'].size(0)
+        for i in range(num_channels_to_ablate):
+            pyramid_layer = int(self.indices[i] / 256)
+            index_in_pyramid_layer = int(self.indices[i] % 256)
+            result[layers[pyramid_layer]][i,
+                                          index_in_pyramid_layer, :, :] = -1000
+        return result

pytorch_grad_cam/activations_and_gradients.py ADDED Viewed

	@@ -0,0 +1,46 @@

+class ActivationsAndGradients:
+    """ Class for extracting activations and
+    registering gradients from targetted intermediate layers """
+    def __init__(self, model, target_layers, reshape_transform):
+        self.model = model
+        self.gradients = []
+        self.activations = []
+        self.reshape_transform = reshape_transform
+        self.handles = []
+        for target_layer in target_layers:
+            self.handles.append(
+                target_layer.register_forward_hook(self.save_activation))
+            # Because of https://github.com/pytorch/pytorch/issues/61519,
+            # we don't use backward hook to record gradients.
+            self.handles.append(
+                target_layer.register_forward_hook(self.save_gradient))
+    def save_activation(self, module, input, output):
+        activation = output
+        if self.reshape_transform is not None:
+            activation = self.reshape_transform(activation)
+        self.activations.append(activation.cpu().detach())
+    def save_gradient(self, module, input, output):
+        if not hasattr(output, "requires_grad") or not output.requires_grad:
+            # You can only register hooks on tensor requires grad.
+            return
+        # Gradients are computed in reverse order
+        def _store_grad(grad):
+            if self.reshape_transform is not None:
+                grad = self.reshape_transform(grad)
+            self.gradients = [grad.cpu().detach()] + self.gradients
+        output.register_hook(_store_grad)
+    def __call__(self, x):
+        self.gradients = []
+        self.activations = []
+        return self.model(x)
+    def release(self):
+        for handle in self.handles:
+            handle.remove()

pytorch_grad_cam/base_cam.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import numpy as np
+import torch
+import ttach as tta
+from typing import Callable, List, Tuple
+from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients
+from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection
+from pytorch_grad_cam.utils.image import scale_cam_image
+from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
+class BaseCAM:
+    def __init__(self,
+                 model: torch.nn.Module,
+                 target_layers: List[torch.nn.Module],
+                 use_cuda: bool = False,
+                 reshape_transform: Callable = None,
+                 compute_input_gradient: bool = False,
+                 uses_gradients: bool = True) -> None:
+        self.model = model.eval()
+        self.target_layers = target_layers
+        self.cuda = use_cuda
+        if self.cuda:
+            self.model = model.cuda()
+        self.reshape_transform = reshape_transform
+        self.compute_input_gradient = compute_input_gradient
+        self.uses_gradients = uses_gradients
+        self.activations_and_grads = ActivationsAndGradients(
+            self.model, target_layers, reshape_transform)
+    """ Get a vector of weights for every channel in the target layer.
+        Methods that return weights channels,
+        will typically need to only implement this function. """
+    def get_cam_weights(self,
+                        input_tensor: torch.Tensor,
+                        target_layers: List[torch.nn.Module],
+                        targets: List[torch.nn.Module],
+                        activations: torch.Tensor,
+                        grads: torch.Tensor) -> np.ndarray:
+        raise Exception("Not Implemented")
+    def get_cam_image(self,
+                      input_tensor: torch.Tensor,
+                      target_layer: torch.nn.Module,
+                      targets: List[torch.nn.Module],
+                      activations: torch.Tensor,
+                      grads: torch.Tensor,
+                      eigen_smooth: bool = False) -> np.ndarray:
+        weights = self.get_cam_weights(input_tensor,
+                                       target_layer,
+                                       targets,
+                                       activations,
+                                       grads)
+        weighted_activations = weights[:, :, None, None] * activations
+        if eigen_smooth:
+            cam = get_2d_projection(weighted_activations)
+        else:
+            cam = weighted_activations.sum(axis=1)
+        return cam
+    def forward(self,
+                input_tensor: torch.Tensor,
+                targets: List[torch.nn.Module],
+                eigen_smooth: bool = False) -> np.ndarray:
+        if self.cuda:
+            input_tensor = input_tensor.cuda()
+        if self.compute_input_gradient:
+            input_tensor = torch.autograd.Variable(input_tensor,
+                                                   requires_grad=True)
+        outputs = self.activations_and_grads(input_tensor)
+        outputs = outputs.pooler_output  # Only for ViT-GPT2 or any other VisionEncoderDecoder model
+        print(outputs)
+        if targets is None:
+            target_categories = np.argmax(outputs.cpu().data.numpy(), axis=-1)  #np.argmax(outputs.cpu().data.numpy(), axis=-1)
+            targets = [ClassifierOutputTarget(
+                category) for category in target_categories]
+        if self.uses_gradients:
+            self.model.zero_grad()
+            loss = sum([target(output)
+                       for target, output in zip(targets, outputs)])
+            loss.backward(retain_graph=True)
+        # In most of the saliency attribution papers, the saliency is
+        # computed with a single target layer.
+        # Commonly it is the last convolutional layer.
+        # Here we support passing a list with multiple target layers.
+        # It will compute the saliency image for every image,
+        # and then aggregate them (with a default mean aggregation).
+        # This gives you more flexibility in case you just want to
+        # use all conv layers for example, all Batchnorm layers,
+        # or something else.
+        cam_per_layer = self.compute_cam_per_layer(input_tensor,
+                                                   targets,
+                                                   eigen_smooth)
+        return self.aggregate_multi_layers(cam_per_layer)
+    def get_target_width_height(self,
+                                input_tensor: torch.Tensor) -> Tuple[int, int]:
+        width, height = input_tensor.size(-1), input_tensor.size(-2)
+        return width, height
+    def compute_cam_per_layer(
+            self,
+            input_tensor: torch.Tensor,
+            targets: List[torch.nn.Module],
+            eigen_smooth: bool) -> np.ndarray:
+        activations_list = [a.cpu().data.numpy()
+                            for a in self.activations_and_grads.activations]
+        grads_list = [g.cpu().data.numpy()
+                      for g in self.activations_and_grads.gradients]
+        target_size = self.get_target_width_height(input_tensor)
+        cam_per_target_layer = []
+        # Loop over the saliency image from every layer
+        for i in range(len(self.target_layers)):
+            target_layer = self.target_layers[i]
+            layer_activations = None
+            layer_grads = None
+            if i < len(activations_list):
+                layer_activations = activations_list[i]
+            if i < len(grads_list):
+                layer_grads = grads_list[i]
+            cam = self.get_cam_image(input_tensor,
+                                     target_layer,
+                                     targets,
+                                     layer_activations,
+                                     layer_grads,
+                                     eigen_smooth)
+            cam = np.maximum(cam, 0)
+            scaled = scale_cam_image(cam, target_size)
+            cam_per_target_layer.append(scaled[:, None, :])
+        return cam_per_target_layer
+    def aggregate_multi_layers(
+            self,
+            cam_per_target_layer: np.ndarray) -> np.ndarray:
+        cam_per_target_layer = np.concatenate(cam_per_target_layer, axis=1)
+        cam_per_target_layer = np.maximum(cam_per_target_layer, 0)
+        result = np.mean(cam_per_target_layer, axis=1)
+        return scale_cam_image(result)
+    def forward_augmentation_smoothing(self,
+                                       input_tensor: torch.Tensor,
+                                       targets: List[torch.nn.Module],
+                                       eigen_smooth: bool = False) -> np.ndarray:
+        transforms = tta.Compose(
+            [
+                tta.HorizontalFlip(),
+                tta.Multiply(factors=[0.9, 1, 1.1]),
+            ]
+        )
+        cams = []
+        for transform in transforms:
+            augmented_tensor = transform.augment_image(input_tensor)
+            cam = self.forward(augmented_tensor,
+                               targets,
+                               eigen_smooth)
+            # The ttach library expects a tensor of size BxCxHxW
+            cam = cam[:, None, :, :]
+            cam = torch.from_numpy(cam)
+            cam = transform.deaugment_mask(cam)
+            # Back to numpy float32, HxW
+            cam = cam.numpy()
+            cam = cam[:, 0, :, :]
+            cams.append(cam)
+        cam = np.mean(np.float32(cams), axis=0)
+        return cam
+    def __call__(self,
+                 input_tensor: torch.Tensor,
+                 targets: List[torch.nn.Module] = None,
+                 aug_smooth: bool = False,
+                 eigen_smooth: bool = False) -> np.ndarray:
+        # Smooth the CAM result with test time augmentation
+        if aug_smooth is True:
+            return self.forward_augmentation_smoothing(
+                input_tensor, targets, eigen_smooth)
+        return self.forward(input_tensor,
+                            targets, eigen_smooth)
+    def __del__(self):
+        self.activations_and_grads.release()
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        self.activations_and_grads.release()
+        if isinstance(exc_value, IndexError):
+            # Handle IndexError here...
+            print(
+                f"An exception occurred in CAM with block: {exc_type}. Message: {exc_value}")
+            return True

pytorch_grad_cam/cam_mult_image.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import torch
+import numpy as np
+from typing import List, Callable
+from pytorch_grad_cam.metrics.perturbation_confidence import PerturbationConfidenceMetric
+def multiply_tensor_with_cam(input_tensor: torch.Tensor,
+                             cam: torch.Tensor):
+    """ Multiply an input tensor (after normalization)
+        with a pixel attribution map
+    """
+    return input_tensor * cam
+class CamMultImageConfidenceChange(PerturbationConfidenceMetric):
+    def __init__(self):
+        super(CamMultImageConfidenceChange,
+              self).__init__(multiply_tensor_with_cam)
+class DropInConfidence(CamMultImageConfidenceChange):
+    def __init__(self):
+        super(DropInConfidence, self).__init__()
+    def __call__(self, *args, **kwargs):
+        scores = super(DropInConfidence, self).__call__(*args, **kwargs)
+        scores = -scores
+        return np.maximum(scores, 0)
+class IncreaseInConfidence(CamMultImageConfidenceChange):
+    def __init__(self):
+        super(IncreaseInConfidence, self).__init__()
+    def __call__(self, *args, **kwargs):
+        scores = super(IncreaseInConfidence, self).__call__(*args, **kwargs)
+        return np.float32(scores > 0)

pytorch_grad_cam/eigen_cam.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from pytorch_grad_cam.base_cam import BaseCAM
+from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection
+# https://arxiv.org/abs/2008.00299
+class EigenCAM(BaseCAM):
+    def __init__(self, model, target_layers, use_cuda=False,
+                 reshape_transform=None):
+        super(EigenCAM, self).__init__(model,
+                                       target_layers,
+                                       use_cuda,
+                                       reshape_transform,
+                                       uses_gradients=False)
+    def get_cam_image(self,
+                      input_tensor,
+                      target_layer,
+                      target_category,
+                      activations,
+                      grads,
+                      eigen_smooth):
+        return get_2d_projection(activations)

pytorch_grad_cam/eigen_grad_cam.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from pytorch_grad_cam.base_cam import BaseCAM
+from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection
+# Like Eigen CAM: https://arxiv.org/abs/2008.00299
+# But multiply the activations x gradients
+class EigenGradCAM(BaseCAM):
+    def __init__(self, model, target_layers, use_cuda=False,
+                 reshape_transform=None):
+        super(EigenGradCAM, self).__init__(model, target_layers, use_cuda,
+                                           reshape_transform)
+    def get_cam_image(self,
+                      input_tensor,
+                      target_layer,
+                      target_category,
+                      activations,
+                      grads,
+                      eigen_smooth):
+        return get_2d_projection(grads * activations)

pytorch_grad_cam/feature_factorization/__init__.py ADDED Viewed

File without changes

pytorch_grad_cam/feature_factorization/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (180 Bytes). View file

pytorch_grad_cam/feature_factorization/__pycache__/deep_feature_factorization.cpython-39.pyc ADDED Viewed

Binary file (4.75 kB). View file