ChayanDeb commited on
Commit
88d7988
·
verified ·
1 Parent(s): c013eea

Upload 82 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. AURA-CXR-Logo.png +0 -0
  3. CAM-Result/gradcam_result.png +3 -0
  4. Chest_Xray_Report_Generator-Web-V2.py +537 -0
  5. Feedback/feedback.txt +0 -0
  6. Model/config.json +0 -0
  7. Model/generation_config.json +6 -0
  8. Model/model.safetensors +3 -0
  9. Model/preprocessor_config.json +22 -0
  10. Model/rng_state.pth +3 -0
  11. Model/scheduler.pt +3 -0
  12. Model/special_tokens_map.json +6 -0
  13. Model/tokenizer.json +0 -0
  14. Model/tokenizer_config.json +20 -0
  15. Model/trainer_state.json +536 -0
  16. Model/training_args.bin +3 -0
  17. Model/vocab.json +0 -0
  18. Test-Images/0d930f0a-46f813a9-db3b137b-05142eef-eca3c5a7.jpg +3 -0
  19. Test-Images/6ff741e9-6ea01eef-1bf10153-d1b6beba-590b6620.jpg +3 -0
  20. Test-Images/93681764-ec39480e-0518b12c-199850c2-f15118ab.jpg +3 -0
  21. pytorch_grad_cam/Readme.md +29 -0
  22. pytorch_grad_cam/__init__.py +20 -0
  23. pytorch_grad_cam/__pycache__/__init__.cpython-39.pyc +0 -0
  24. pytorch_grad_cam/__pycache__/ablation_cam.cpython-39.pyc +0 -0
  25. pytorch_grad_cam/__pycache__/ablation_layer.cpython-39.pyc +0 -0
  26. pytorch_grad_cam/__pycache__/activations_and_gradients.cpython-39.pyc +0 -0
  27. pytorch_grad_cam/__pycache__/base_cam.cpython-39.pyc +0 -0
  28. pytorch_grad_cam/__pycache__/eigen_cam.cpython-39.pyc +0 -0
  29. pytorch_grad_cam/__pycache__/eigen_grad_cam.cpython-39.pyc +0 -0
  30. pytorch_grad_cam/__pycache__/fullgrad_cam.cpython-39.pyc +0 -0
  31. pytorch_grad_cam/__pycache__/grad_cam.cpython-39.pyc +0 -0
  32. pytorch_grad_cam/__pycache__/grad_cam_elementwise.cpython-39.pyc +0 -0
  33. pytorch_grad_cam/__pycache__/grad_cam_plusplus.cpython-39.pyc +0 -0
  34. pytorch_grad_cam/__pycache__/guided_backprop.cpython-39.pyc +0 -0
  35. pytorch_grad_cam/__pycache__/hirescam.cpython-39.pyc +0 -0
  36. pytorch_grad_cam/__pycache__/layer_cam.cpython-39.pyc +0 -0
  37. pytorch_grad_cam/__pycache__/random_cam.cpython-39.pyc +0 -0
  38. pytorch_grad_cam/__pycache__/score_cam.cpython-39.pyc +0 -0
  39. pytorch_grad_cam/__pycache__/xgrad_cam.cpython-39.pyc +0 -0
  40. pytorch_grad_cam/ablation_cam.py +148 -0
  41. pytorch_grad_cam/ablation_cam_multilayer.py +136 -0
  42. pytorch_grad_cam/ablation_layer.py +155 -0
  43. pytorch_grad_cam/activations_and_gradients.py +46 -0
  44. pytorch_grad_cam/base_cam.py +205 -0
  45. pytorch_grad_cam/cam_mult_image.py +37 -0
  46. pytorch_grad_cam/eigen_cam.py +23 -0
  47. pytorch_grad_cam/eigen_grad_cam.py +21 -0
  48. pytorch_grad_cam/feature_factorization/__init__.py +0 -0
  49. pytorch_grad_cam/feature_factorization/__pycache__/__init__.cpython-39.pyc +0 -0
  50. pytorch_grad_cam/feature_factorization/__pycache__/deep_feature_factorization.cpython-39.pyc +0 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ CAM-Result/gradcam_result.png filter=lfs diff=lfs merge=lfs -text
37
+ Test-Images/0d930f0a-46f813a9-db3b137b-05142eef-eca3c5a7.jpg filter=lfs diff=lfs merge=lfs -text
38
+ Test-Images/6ff741e9-6ea01eef-1bf10153-d1b6beba-590b6620.jpg filter=lfs diff=lfs merge=lfs -text
39
+ Test-Images/93681764-ec39480e-0518b12c-199850c2-f15118ab.jpg filter=lfs diff=lfs merge=lfs -text
AURA-CXR-Logo.png ADDED
CAM-Result/gradcam_result.png ADDED

Git LFS Details

  • SHA256: a1b81c27fb575d2fbac2f18afd20b0e4e1ca81b08e4c1d4797ee6392c88fef48
  • Pointer size: 131 Bytes
  • Size of remote file: 212 kB
Chest_Xray_Report_Generator-Web-V2.py ADDED
@@ -0,0 +1,537 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import transformers
3
+ from transformers import pipeline
4
+
5
+ ### Gradio
6
+ import gradio as gr
7
+ from gradio.themes.base import Base
8
+ from gradio.themes.utils import colors, fonts, sizes
9
+ from typing import Union, Iterable
10
+ import time
11
+ #####
12
+
13
+
14
+ import cv2
15
+ import numpy as np
16
+ import pydicom
17
+ import re
18
+
19
+ ##### Libraries For Grad-Cam-View
20
+ import os
21
+ import cv2
22
+ import numpy as np
23
+ import torch
24
+ from functools import partial
25
+ from torchvision import transforms
26
+ from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, EigenGradCAM, LayerCAM, FullGrad
27
+ from pytorch_grad_cam.utils.image import show_cam_on_image, preprocess_image
28
+ from pytorch_grad_cam.ablation_layer import AblationLayerVit
29
+ from transformers import VisionEncoderDecoderModel
30
+
31
+
32
+ from transformers import AutoTokenizer
33
+ import transformers
34
+ import torch
35
+
36
+ from openai import OpenAI
37
+ client = OpenAI()
38
+
39
+ import spaces # Import the spaces module for ZeroGPU
40
+
41
+
42
+ @spaces.GPU
43
+ def generate_gradcam(image_path, model_path, output_path, method='gradcam', use_cuda=True, aug_smooth=False, eigen_smooth=False):
44
+ methods = {
45
+ "gradcam": GradCAM,
46
+ "scorecam": ScoreCAM,
47
+ "gradcam++": GradCAMPlusPlus,
48
+ "ablationcam": AblationCAM,
49
+ "xgradcam": XGradCAM,
50
+ "eigencam": EigenCAM,
51
+ "eigengradcam": EigenGradCAM,
52
+ "layercam": LayerCAM,
53
+ "fullgrad": FullGrad
54
+ }
55
+
56
+ if method not in methods:
57
+ raise ValueError(f"Method should be one of {list(methods.keys())}")
58
+
59
+ model = VisionEncoderDecoderModel.from_pretrained(model_path)
60
+ model.encoder.eval()
61
+
62
+ if use_cuda and torch.cuda.is_available():
63
+ model.encoder = model.encoder.cuda()
64
+ else:
65
+ use_cuda = False
66
+
67
+ #target_layers = [model.blocks[-1].norm1] ## For ViT model
68
+ #target_layers = model.blocks[-1].norm1 ## For EfficientNet-B7 model
69
+ #target_layers = [model.encoder.encoder.layer[-1].layernorm_before] ## For ViT-based VisionEncoderDecoder model
70
+ target_layers = [model.encoder.encoder.layers[-1].blocks[-0].layernorm_after, model.encoder.encoder.layers[-1].blocks[-1].layernorm_after] ## [model.encoder.encoder.layers[-1].blocks[-1].layernorm_before, model.encoder.encoder.layers[-1].blocks[0].layernorm_before] For Swin-based VisionEncoderDecoder model
71
+
72
+
73
+ if method == "ablationcam":
74
+ cam = methods[method](model=model.encoder,
75
+ target_layers=target_layers,
76
+ use_cuda=use_cuda,
77
+ reshape_transform=reshape_transform,
78
+ ablation_layer=AblationLayerVit())
79
+ else:
80
+ cam = methods[method](model=model.encoder,
81
+ target_layers=target_layers,
82
+ use_cuda=use_cuda,
83
+ reshape_transform=reshape_transform)
84
+
85
+ rgb_img = cv2.imread(image_path, 1)[:, :, ::-1]
86
+ rgb_img = cv2.resize(rgb_img, (384, 384)) ## (224, 224)
87
+ rgb_img = np.float32(rgb_img) / 255
88
+ input_tensor = preprocess_image(rgb_img, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
89
+
90
+ targets = None
91
+ cam.batch_size = 16
92
+
93
+ grayscale_cam = cam(input_tensor=input_tensor, targets=targets, eigen_smooth=eigen_smooth, aug_smooth=aug_smooth)
94
+ grayscale_cam = grayscale_cam[0, :]
95
+
96
+ cam_image = show_cam_on_image(rgb_img, grayscale_cam)
97
+ output_file = os.path.join(output_path, 'gradcam_result.png')
98
+ cv2.imwrite(output_file, cam_image)
99
+
100
+
101
+
102
+ def reshape_transform(tensor, height=12, width=12): ### height=14, width=14 for ViT-based Model
103
+ batch_size, token_number, embed_dim = tensor.size()
104
+ if token_number < height * width:
105
+ pad = torch.zeros(batch_size, height * width - token_number, embed_dim, device=tensor.device)
106
+ tensor = torch.cat([tensor, pad], dim=1)
107
+ elif token_number > height * width:
108
+ tensor = tensor[:, :height * width, :]
109
+
110
+ result = tensor.reshape(batch_size, height, width, embed_dim)
111
+ result = result.transpose(2, 3).transpose(1, 2)
112
+ return result
113
+
114
+
115
+ # Example usage:
116
+ #image_path = "/home/chayan/CGI_Net/images/images/CXR1353_IM-0230-1001.png"
117
+ model_path = "./Model/"
118
+ output_path = "./CAM-Result/"
119
+
120
+
121
+
122
+ def sentence_case(paragraph):
123
+ sentences = paragraph.split('. ')
124
+ formatted_sentences = [sentence.capitalize() for sentence in sentences if sentence]
125
+ formatted_paragraph = '. '.join(formatted_sentences)
126
+ return formatted_paragraph
127
+
128
+ def num2sym_bullets(text, bullet='-'):
129
+ """
130
+ Replaces '<num>.' bullet points with a specified symbol and formats the text as a bullet list.
131
+
132
+ Args:
133
+ text (str): Input text containing '<num>.' bullet points.
134
+ bullet (str): The symbol to replace '<num>.' with.
135
+
136
+ Returns:
137
+ str: Modified text with '<num>.' replaced and formatted as a bullet list.
138
+ """
139
+ sentences = re.split(r'<num>\.\s', text)
140
+ formatted_text = '\n'.join(f'{bullet} {sentence.strip()}' for sentence in sentences if sentence.strip())
141
+ return formatted_text
142
+
143
+ def is_cxr(image_path):
144
+ """
145
+ Checks if the uploaded image is a Chest X-ray using basic image processing.
146
+
147
+ Args:
148
+ image_path (str): Path to the uploaded image.
149
+
150
+ Returns:
151
+ bool: True if the image is likely a Chest X-ray, False otherwise.
152
+ """
153
+ try:
154
+
155
+ image = cv2.imread(image_path)
156
+
157
+ if image is None:
158
+ raise ValueError("Invalid image path.")
159
+
160
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
161
+ color_std = np.std(image, axis=2).mean()
162
+
163
+ if color_std > 0:
164
+ return False
165
+
166
+ return True
167
+
168
+ except Exception as e:
169
+ print(f"Error processing image: {e}")
170
+ return False
171
+
172
+ def dicom_to_png(dicom_file, png_file):
173
+ # Load DICOM file
174
+ dicom_data = pydicom.dcmread(dicom_file)
175
+ dicom_data.PhotometricInterpretation = 'MONOCHROME1'
176
+
177
+ # Normalize pixel values to 0-255
178
+ img = dicom_data.pixel_array
179
+ img = img.astype(np.float32)
180
+
181
+ img = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX)
182
+ img = img.astype(np.uint8)
183
+
184
+ # Save as PNG
185
+ cv2.imwrite(png_file, img)
186
+ return img
187
+
188
+
189
+ Image_Captioner = pipeline("image-to-text", model = "./Model/", device = 0)
190
+
191
+ data_dir = "./CAM-Result"
192
+
193
+ @spaces.GPU(duration=300)
194
+ def xray_report_generator(Image_file, Query):
195
+ if Image_file[-4:] =='.dcm':
196
+ png_file = 'DCM2PNG.png'
197
+ dicom_to_png(Image_file, png_file)
198
+ Image_file = os.path.join(data_dir, png_file)
199
+ output = Image_Captioner(Image_file, max_new_tokens=512)
200
+
201
+ else:
202
+ output = Image_Captioner(Image_file, max_new_tokens=512)
203
+
204
+ result = output[0]['generated_text']
205
+ output_paragraph = sentence_case(result)
206
+
207
+ final_response = num2sym_bullets(output_paragraph, bullet='-')
208
+
209
+ query_prompt = f""" You are analyzing the doctor's query based on the patient's history and the generated chest X-ray report. Extract only the information relevant to the query.
210
+ If the report mentions the queried condition, write only the exact wording without any introduction. If the condition is not mentioned, respond with: 'No relevant findings related to [query condition].'.
211
+ """
212
+
213
+ #If the condition is negated, respond with: 'There is no [query condition].'.
214
+
215
+ completion = client.chat.completions.create(
216
+ model="gpt-4-turbo", ### gpt-4-turbo ### gpt-3.5-turbo-0125
217
+ messages=[
218
+ {"role": "system", "content": query_prompt},
219
+ {"role": "user", "content": f"Generated Report: {final_response}\nHistory/Doctor's Query: {Query}"}
220
+ ],
221
+ temperature=0.2)
222
+ query_response = completion.choices[0].message.content
223
+
224
+ generate_gradcam(Image_file, model_path, output_path, method='gradcam', use_cuda=True)
225
+
226
+ grad_cam_image = output_path + 'gradcam_result.png'
227
+
228
+ return grad_cam_image, final_response, query_response
229
+
230
+
231
+ # def save_feedback(feedback):
232
+ # feedback_dir = "Chayan/Feedback/" # Update this to your desired directory
233
+ # if not os.path.exists(feedback_dir):
234
+ # os.makedirs(feedback_dir)
235
+ # feedback_file = os.path.join(feedback_dir, "feedback.txt")
236
+ # with open(feedback_file, "a") as f:
237
+ # f.write(feedback + "\n")
238
+ # return "Feedback submitted successfully!"
239
+
240
+
241
+ def save_feedback(feedback):
242
+ feedback_dir = "Chayan/Feedback/" # Update this to your desired directory
243
+ if not os.path.exists(feedback_dir):
244
+ os.makedirs(feedback_dir)
245
+ feedback_file = os.path.join(feedback_dir, "feedback.txt")
246
+
247
+ try:
248
+ with open(feedback_file, "a") as f:
249
+ f.write(feedback + "\n")
250
+ print(f"Feedback saved at: {feedback_file}")
251
+ return "Feedback submitted successfully!"
252
+ except Exception as e:
253
+ print(f"Error saving feedback: {e}")
254
+ return "Failed to submit feedback!"
255
+
256
+
257
+ # Custom Theme Definition
258
+ class Seafoam(Base):
259
+ def __init__(
260
+ self,
261
+ *,
262
+ primary_hue: Union[colors.Color, str] = colors.emerald,
263
+ secondary_hue: Union[colors.Color, str] = colors.blue,
264
+ neutral_hue: Union[colors.Color, str] = colors.gray,
265
+ spacing_size: Union[sizes.Size, str] = sizes.spacing_md,
266
+ radius_size: Union[sizes.Size, str] = sizes.radius_md,
267
+ text_size: Union[sizes.Size, str] = sizes.text_lg,
268
+ font: Union[fonts.Font, str, Iterable[Union[fonts.Font, str]]] = (
269
+ fonts.GoogleFont("Quicksand"),
270
+ "ui-sans-serif",
271
+ "sans-serif",
272
+ ),
273
+ font_mono: Union[fonts.Font, str, Iterable[Union[fonts.Font, str]]] = (
274
+ fonts.GoogleFont("IBM Plex Mono"),
275
+ "ui-monospace",
276
+ "monospace",
277
+ ),
278
+ ):
279
+ super().__init__(
280
+ primary_hue=primary_hue,
281
+ secondary_hue=secondary_hue,
282
+ neutral_hue=neutral_hue,
283
+ spacing_size=spacing_size,
284
+ radius_size=radius_size,
285
+ text_size=text_size,
286
+ font=font,
287
+ font_mono=font_mono,
288
+ )
289
+
290
+ self.set(
291
+ body_background_fill="linear-gradient(114.2deg, rgba(184,215,21,1) -15.3%, rgba(21,215,98,1) 14.5%, rgba(21,215,182,1) 38.7%, rgba(129,189,240,1) 58.8%, rgba(219,108,205,1) 77.3%, rgba(240,129,129,1) 88.5%)"
292
+ )
293
+ # Initialize the theme
294
+ seafoam = Seafoam()
295
+
296
+
297
+
298
+ # Custom CSS styles
299
+ custom_css = """
300
+ <style>
301
+
302
+ /* Set background color for the entire Gradio app */
303
+ body, .gradio-container {
304
+ background-color: #f2f7f5 !important;
305
+ }
306
+
307
+ /* Optional: Add padding or margin for aesthetics */
308
+ .gradio-container {
309
+ padding: 20px;
310
+ }
311
+
312
+ #title {
313
+ color: green;
314
+ font-size: 36px;
315
+ font-weight: bold;
316
+ }
317
+ #description {
318
+ color: green;
319
+ font-size: 22px;
320
+ }
321
+
322
+ #title-row {
323
+ display: flex;
324
+ align-items: center;
325
+ gap: 10px;
326
+ margin-bottom: 0px;
327
+ }
328
+ #title-header h1 {
329
+ margin: 0;
330
+ }
331
+
332
+
333
+ #submit-btn {
334
+ background-color: #f5dec6; /* Banana leaf */
335
+ color: green;
336
+ padding: 15px 32px;
337
+ text-align: center;
338
+ text-decoration: none;
339
+ display: inline-block;
340
+ font-size: 30px;
341
+ margin: 4px 2px;
342
+ cursor: pointer;
343
+ }
344
+ #submit-btn:hover {
345
+ background-color: #00FFFF;
346
+ }
347
+
348
+
349
+ .intext textarea {
350
+ color: green;
351
+ font-size: 20px;
352
+ font-weight: bold;
353
+ }
354
+
355
+
356
+ .small-button {
357
+ color: green;
358
+ padding: 5px 10px;
359
+ font-size: 20px;
360
+ }
361
+
362
+ </style>
363
+ """
364
+
365
+ # Sample image paths
366
+ sample_images = [
367
+ "./Test-Images/0d930f0a-46f813a9-db3b137b-05142eef-eca3c5a7.jpg",
368
+ "./Test-Images/93681764-ec39480e-0518b12c-199850c2-f15118ab.jpg",
369
+ "./Test-Images/6ff741e9-6ea01eef-1bf10153-d1b6beba-590b6620.jpg"
370
+ #"sample4.png",
371
+ #"sample5.png"
372
+ ]
373
+
374
+ def set_input_image(image_path):
375
+ return gr.update(value=image_path)
376
+
377
+ def show_contact_info():
378
+ yield gr.update(visible=True, value="""
379
+ **Contact Us:**
380
+ - Chayan Mondal
381
+ - Email: [email protected]
382
+ - Associate Prof. Sonny Pham
383
+ - Email: [email protected]
384
+ - Dr. Ashu Gupta
385
+ - Email: [email protected]
386
+ """)
387
+ # Wait for 20 seconds (you can adjust the time as needed)
388
+ time.sleep(20)
389
+ # Hide the content after 5 seconds
390
+ yield gr.update(visible=False)
391
+
392
+ def show_acknowledgment():
393
+ yield gr.update(visible=True, value="""
394
+ **Acknowledgment:**
395
+ This Research has been supported by the Western Australian Future Health Research and Innovation Fund.
396
+ """)
397
+ # Wait for 20 seconds
398
+ time.sleep(20)
399
+ # Hide the acknowledgment
400
+ yield gr.update(visible=False)
401
+
402
+
403
+ with gr.Blocks(theme=seafoam, css=custom_css) as demo:
404
+
405
+ #gr.HTML(custom_css) # Inject custom CSS
406
+
407
+
408
+ with gr.Row(elem_id="title-row"):
409
+ with gr.Column(scale=0):
410
+ gr.Image(
411
+ value="./AURA-CXR-Logo.png",
412
+ show_label=False,
413
+ width=60,
414
+ container=False
415
+ )
416
+ with gr.Column():
417
+ gr.Markdown(
418
+ """
419
+ <h1 style="color:blue; font-size: 32px; font-weight: bold; margin: 0;">
420
+ AURA-CXR: Explainable Diagnosis of Chest Diseases from X-rays
421
+ </h1>
422
+ """,
423
+ elem_id="title-header"
424
+ )
425
+
426
+ gr.Markdown(
427
+ "<p id='description'>Upload an X-ray image and get its report with heat-map visualization.</p>"
428
+ )
429
+
430
+
431
+
432
+ # gr.Markdown(
433
+ # """
434
+ # <h1 style="color:blue; font-size: 36px; font-weight: bold; margin: 0;">AURA-CXR: Explainable Diagnosis of Chest Diseases from X-rays</h1>
435
+ # <p id="description">Upload an X-ray image and get its report with heat-map visualization.</p>
436
+ # """
437
+ # )
438
+
439
+ #<h1 style="color:blue; font-size: 36px; font-weight: bold">AURA-CXR: Explainable Diagnosis of Chest Diseases from X-rays</h1>
440
+
441
+ with gr.Row():
442
+ inputs = gr.File(label="Upload Chest X-ray Image File", type="filepath")
443
+
444
+ with gr.Row():
445
+ with gr.Column(scale=1, min_width=300):
446
+ outputs1 = gr.Image(label="Image Viewer")
447
+ history_query = gr.Textbox(label="History/Doctor's Query", elem_classes="intext")
448
+ with gr.Column(scale=1, min_width=300):
449
+ outputs2 = gr.Image(label="Grad_CAM-Visualization")
450
+ with gr.Column(scale=1, min_width=300):
451
+ outputs3 = gr.Textbox(label="Generated Report", elem_classes = "intext")
452
+ outputs4 = gr.Textbox(label = "Query's Response", elem_classes = "intext")
453
+
454
+
455
+ submit_btn = gr.Button("Generate Report", elem_id="submit-btn", variant="primary")
456
+
457
+ def show_image(file_path):
458
+ if is_cxr(file_path): # Check if it's a valid Chest X-ray
459
+ return file_path, "Valid Image" # Show the image in Image Viewer
460
+ else:
461
+ return None, "Invalid image. Please upload a proper Chest X-ray."
462
+
463
+
464
+ # Show the uploaded image immediately in the Image Viewer
465
+ inputs.change(
466
+ fn=show_image, # Calls the function to return the same file path
467
+ inputs=inputs,
468
+ outputs=[outputs1, outputs3]
469
+ )
470
+
471
+
472
+
473
+
474
+ submit_btn.click(
475
+ fn=xray_report_generator,
476
+ inputs=[inputs,history_query],
477
+ outputs=[outputs2, outputs3, outputs4])
478
+
479
+
480
+ gr.Markdown(
481
+ """
482
+ <h2 style="color:green; font-size: 24px;">Or choose a sample image:</h2>
483
+ """
484
+ )
485
+
486
+ with gr.Row():
487
+ for idx, sample_image in enumerate(sample_images):
488
+ with gr.Column(scale=1):
489
+ #sample_image_component = gr.Image(value=sample_image, interactive=False)
490
+ select_button = gr.Button(f"Select Sample Image {idx+1}")
491
+ select_button.click(
492
+ fn=set_input_image,
493
+ inputs=gr.State(value=sample_image),
494
+ outputs=inputs
495
+ )
496
+
497
+
498
+ # Feedback section
499
+ gr.Markdown(
500
+ """
501
+ <h2 style="color:green; font-size: 24px;">Provide Your Valuable Feedback:</h2>
502
+ """
503
+ )
504
+
505
+ with gr.Row():
506
+ feedback_input = gr.Textbox(label="Your Feedback", lines=4, placeholder="Enter your feedback here...")
507
+ feedback_submit_btn = gr.Button("Submit Feedback", elem_classes="small-button", variant="secondary")
508
+ feedback_output = gr.Textbox(label="Feedback Status", interactive=False)
509
+
510
+
511
+
512
+ feedback_submit_btn.click(
513
+ fn=save_feedback,
514
+ inputs=feedback_input,
515
+ outputs=feedback_output
516
+ )
517
+
518
+
519
+ # Buttons and Markdown for Contact Us and Acknowledgment
520
+ with gr.Row():
521
+ contact_btn = gr.Button("Contact Us", elem_classes="small-button", variant="secondary")
522
+ ack_btn = gr.Button("Acknowledgment", elem_classes="small-button", variant="secondary")
523
+
524
+ contact_info = gr.Markdown(visible=False) # Initially hidden
525
+ acknowledgment_info = gr.Markdown(visible=False) # Initially hidden
526
+
527
+ # Update the content and make it visible when the buttons are clicked
528
+ contact_btn.click(fn=show_contact_info, outputs=contact_info, show_progress=False)
529
+ ack_btn.click(fn=show_acknowledgment, outputs=acknowledgment_info, show_progress=False)
530
+
531
+ # Update the content and make it visible when the buttons are clicked
532
+ # contact_btn.click(fn=show_contact_info, outputs=contact_info, show_progress=False)
533
+ # ack_btn.click(fn=show_acknowledgment, outputs=acknowledgment_info, show_progress=False)
534
+
535
+
536
+ demo.launch(share=True)
537
+
Feedback/feedback.txt ADDED
File without changes
Model/config.json ADDED
The diff for this file is too large to render. See raw diff
 
Model/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 50256,
3
+ "eos_token_id": 50256,
4
+ "max_length": 200,
5
+ "transformers_version": "4.37.1"
6
+ }
Model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb50b4debdf509c1f8c4dbbf344031528969f45426d358c62d39edcad08452ea
3
+ size 965957568
Model/preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.485,
7
+ 0.456,
8
+ 0.406
9
+ ],
10
+ "feature_extractor_type": "ViTFeatureExtractor",
11
+ "image_std": [
12
+ 0.229,
13
+ 0.224,
14
+ 0.225
15
+ ],
16
+ "resample": 3,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 384,
20
+ "width": 384
21
+ }
22
+ }
Model/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ed7612de6b8d4c06ccacb9ae48d72f25eaa405bb7d12ebc21c86121cca30197
3
+ size 14575
Model/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9f8ff02ac948318fd4b1db36c6dc3626126a027e501925cfc3bd76ac45c3505
3
+ size 627
Model/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
Model/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
Model/tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "tokenizer_class": "GPT2Tokenizer",
19
+ "unk_token": "<|endoftext|>"
20
+ }
Model/trainer_state.json ADDED
@@ -0,0 +1,536 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.0629316121339798,
3
+ "best_model_checkpoint": "./Swin-GPT2_Mimic/checkpoint-37500",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 37500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.07,
13
+ "learning_rate": 4.9833333333333336e-05,
14
+ "loss": 0.1362,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.13,
19
+ "learning_rate": 4.966666666666667e-05,
20
+ "loss": 0.089,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.2,
25
+ "learning_rate": 4.9500000000000004e-05,
26
+ "loss": 0.0805,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 0.27,
31
+ "learning_rate": 4.933333333333334e-05,
32
+ "loss": 0.0779,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 0.33,
37
+ "learning_rate": 4.9166666666666665e-05,
38
+ "loss": 0.0775,
39
+ "step": 2500
40
+ },
41
+ {
42
+ "epoch": 0.4,
43
+ "learning_rate": 4.9e-05,
44
+ "loss": 0.0763,
45
+ "step": 3000
46
+ },
47
+ {
48
+ "epoch": 0.47,
49
+ "learning_rate": 4.883333333333334e-05,
50
+ "loss": 0.0749,
51
+ "step": 3500
52
+ },
53
+ {
54
+ "epoch": 0.53,
55
+ "learning_rate": 4.866666666666667e-05,
56
+ "loss": 0.0702,
57
+ "step": 4000
58
+ },
59
+ {
60
+ "epoch": 0.6,
61
+ "learning_rate": 4.85e-05,
62
+ "loss": 0.0701,
63
+ "step": 4500
64
+ },
65
+ {
66
+ "epoch": 0.67,
67
+ "learning_rate": 4.8333333333333334e-05,
68
+ "loss": 0.0715,
69
+ "step": 5000
70
+ },
71
+ {
72
+ "epoch": 0.73,
73
+ "learning_rate": 4.8166666666666674e-05,
74
+ "loss": 0.0725,
75
+ "step": 5500
76
+ },
77
+ {
78
+ "epoch": 0.8,
79
+ "learning_rate": 4.8e-05,
80
+ "loss": 0.0677,
81
+ "step": 6000
82
+ },
83
+ {
84
+ "epoch": 0.87,
85
+ "learning_rate": 4.7833333333333335e-05,
86
+ "loss": 0.0696,
87
+ "step": 6500
88
+ },
89
+ {
90
+ "epoch": 0.93,
91
+ "learning_rate": 4.766666666666667e-05,
92
+ "loss": 0.065,
93
+ "step": 7000
94
+ },
95
+ {
96
+ "epoch": 1.0,
97
+ "learning_rate": 4.75e-05,
98
+ "loss": 0.0646,
99
+ "step": 7500
100
+ },
101
+ {
102
+ "epoch": 1.0,
103
+ "eval_gen_len": 8.897,
104
+ "eval_loss": 0.06988305598497391,
105
+ "eval_rouge1": 34.7412,
106
+ "eval_rouge2": 25.6954,
107
+ "eval_rougeL": 34.4803,
108
+ "eval_rougeLsum": 34.7871,
109
+ "eval_runtime": 103.0848,
110
+ "eval_samples_per_second": 9.701,
111
+ "eval_steps_per_second": 1.213,
112
+ "step": 7500
113
+ },
114
+ {
115
+ "epoch": 1.07,
116
+ "learning_rate": 4.7333333333333336e-05,
117
+ "loss": 0.0651,
118
+ "step": 8000
119
+ },
120
+ {
121
+ "epoch": 1.13,
122
+ "learning_rate": 4.716666666666667e-05,
123
+ "loss": 0.0647,
124
+ "step": 8500
125
+ },
126
+ {
127
+ "epoch": 1.2,
128
+ "learning_rate": 4.7e-05,
129
+ "loss": 0.0644,
130
+ "step": 9000
131
+ },
132
+ {
133
+ "epoch": 1.27,
134
+ "learning_rate": 4.683333333333334e-05,
135
+ "loss": 0.0613,
136
+ "step": 9500
137
+ },
138
+ {
139
+ "epoch": 1.33,
140
+ "learning_rate": 4.666666666666667e-05,
141
+ "loss": 0.0664,
142
+ "step": 10000
143
+ },
144
+ {
145
+ "epoch": 1.4,
146
+ "learning_rate": 4.6500000000000005e-05,
147
+ "loss": 0.0631,
148
+ "step": 10500
149
+ },
150
+ {
151
+ "epoch": 1.47,
152
+ "learning_rate": 4.633333333333333e-05,
153
+ "loss": 0.0623,
154
+ "step": 11000
155
+ },
156
+ {
157
+ "epoch": 1.53,
158
+ "learning_rate": 4.6166666666666666e-05,
159
+ "loss": 0.0612,
160
+ "step": 11500
161
+ },
162
+ {
163
+ "epoch": 1.6,
164
+ "learning_rate": 4.600000000000001e-05,
165
+ "loss": 0.062,
166
+ "step": 12000
167
+ },
168
+ {
169
+ "epoch": 1.67,
170
+ "learning_rate": 4.5833333333333334e-05,
171
+ "loss": 0.0605,
172
+ "step": 12500
173
+ },
174
+ {
175
+ "epoch": 1.73,
176
+ "learning_rate": 4.566666666666667e-05,
177
+ "loss": 0.0619,
178
+ "step": 13000
179
+ },
180
+ {
181
+ "epoch": 1.8,
182
+ "learning_rate": 4.55e-05,
183
+ "loss": 0.062,
184
+ "step": 13500
185
+ },
186
+ {
187
+ "epoch": 1.87,
188
+ "learning_rate": 4.5333333333333335e-05,
189
+ "loss": 0.0622,
190
+ "step": 14000
191
+ },
192
+ {
193
+ "epoch": 1.93,
194
+ "learning_rate": 4.516666666666667e-05,
195
+ "loss": 0.06,
196
+ "step": 14500
197
+ },
198
+ {
199
+ "epoch": 2.0,
200
+ "learning_rate": 4.5e-05,
201
+ "loss": 0.0597,
202
+ "step": 15000
203
+ },
204
+ {
205
+ "epoch": 2.0,
206
+ "eval_gen_len": 14.724,
207
+ "eval_loss": 0.06516863405704498,
208
+ "eval_rouge1": 38.0809,
209
+ "eval_rouge2": 26.9533,
210
+ "eval_rougeL": 37.259,
211
+ "eval_rougeLsum": 37.8078,
212
+ "eval_runtime": 113.6453,
213
+ "eval_samples_per_second": 8.799,
214
+ "eval_steps_per_second": 1.1,
215
+ "step": 15000
216
+ },
217
+ {
218
+ "epoch": 2.07,
219
+ "learning_rate": 4.483333333333333e-05,
220
+ "loss": 0.0559,
221
+ "step": 15500
222
+ },
223
+ {
224
+ "epoch": 2.13,
225
+ "learning_rate": 4.466666666666667e-05,
226
+ "loss": 0.0595,
227
+ "step": 16000
228
+ },
229
+ {
230
+ "epoch": 2.2,
231
+ "learning_rate": 4.4500000000000004e-05,
232
+ "loss": 0.0569,
233
+ "step": 16500
234
+ },
235
+ {
236
+ "epoch": 2.27,
237
+ "learning_rate": 4.433333333333334e-05,
238
+ "loss": 0.0558,
239
+ "step": 17000
240
+ },
241
+ {
242
+ "epoch": 2.33,
243
+ "learning_rate": 4.4166666666666665e-05,
244
+ "loss": 0.0578,
245
+ "step": 17500
246
+ },
247
+ {
248
+ "epoch": 2.4,
249
+ "learning_rate": 4.4000000000000006e-05,
250
+ "loss": 0.0571,
251
+ "step": 18000
252
+ },
253
+ {
254
+ "epoch": 2.47,
255
+ "learning_rate": 4.383333333333334e-05,
256
+ "loss": 0.0586,
257
+ "step": 18500
258
+ },
259
+ {
260
+ "epoch": 2.53,
261
+ "learning_rate": 4.3666666666666666e-05,
262
+ "loss": 0.0577,
263
+ "step": 19000
264
+ },
265
+ {
266
+ "epoch": 2.6,
267
+ "learning_rate": 4.35e-05,
268
+ "loss": 0.0583,
269
+ "step": 19500
270
+ },
271
+ {
272
+ "epoch": 2.67,
273
+ "learning_rate": 4.3333333333333334e-05,
274
+ "loss": 0.0574,
275
+ "step": 20000
276
+ },
277
+ {
278
+ "epoch": 2.73,
279
+ "learning_rate": 4.316666666666667e-05,
280
+ "loss": 0.0563,
281
+ "step": 20500
282
+ },
283
+ {
284
+ "epoch": 2.8,
285
+ "learning_rate": 4.3e-05,
286
+ "loss": 0.057,
287
+ "step": 21000
288
+ },
289
+ {
290
+ "epoch": 2.87,
291
+ "learning_rate": 4.2833333333333335e-05,
292
+ "loss": 0.0559,
293
+ "step": 21500
294
+ },
295
+ {
296
+ "epoch": 2.93,
297
+ "learning_rate": 4.266666666666667e-05,
298
+ "loss": 0.0565,
299
+ "step": 22000
300
+ },
301
+ {
302
+ "epoch": 3.0,
303
+ "learning_rate": 4.25e-05,
304
+ "loss": 0.0577,
305
+ "step": 22500
306
+ },
307
+ {
308
+ "epoch": 3.0,
309
+ "eval_gen_len": 13.501,
310
+ "eval_loss": 0.06393314898014069,
311
+ "eval_rouge1": 37.8142,
312
+ "eval_rouge2": 26.9542,
313
+ "eval_rougeL": 37.076,
314
+ "eval_rougeLsum": 37.5874,
315
+ "eval_runtime": 112.3223,
316
+ "eval_samples_per_second": 8.903,
317
+ "eval_steps_per_second": 1.113,
318
+ "step": 22500
319
+ },
320
+ {
321
+ "epoch": 3.07,
322
+ "learning_rate": 4.233333333333334e-05,
323
+ "loss": 0.0511,
324
+ "step": 23000
325
+ },
326
+ {
327
+ "epoch": 3.13,
328
+ "learning_rate": 4.216666666666667e-05,
329
+ "loss": 0.0526,
330
+ "step": 23500
331
+ },
332
+ {
333
+ "epoch": 3.2,
334
+ "learning_rate": 4.2e-05,
335
+ "loss": 0.0514,
336
+ "step": 24000
337
+ },
338
+ {
339
+ "epoch": 3.27,
340
+ "learning_rate": 4.183333333333334e-05,
341
+ "loss": 0.053,
342
+ "step": 24500
343
+ },
344
+ {
345
+ "epoch": 3.33,
346
+ "learning_rate": 4.166666666666667e-05,
347
+ "loss": 0.0526,
348
+ "step": 25000
349
+ },
350
+ {
351
+ "epoch": 3.4,
352
+ "learning_rate": 4.15e-05,
353
+ "loss": 0.0542,
354
+ "step": 25500
355
+ },
356
+ {
357
+ "epoch": 3.47,
358
+ "learning_rate": 4.133333333333333e-05,
359
+ "loss": 0.0533,
360
+ "step": 26000
361
+ },
362
+ {
363
+ "epoch": 3.53,
364
+ "learning_rate": 4.116666666666667e-05,
365
+ "loss": 0.0537,
366
+ "step": 26500
367
+ },
368
+ {
369
+ "epoch": 3.6,
370
+ "learning_rate": 4.1e-05,
371
+ "loss": 0.0519,
372
+ "step": 27000
373
+ },
374
+ {
375
+ "epoch": 3.67,
376
+ "learning_rate": 4.0833333333333334e-05,
377
+ "loss": 0.0532,
378
+ "step": 27500
379
+ },
380
+ {
381
+ "epoch": 3.73,
382
+ "learning_rate": 4.066666666666667e-05,
383
+ "loss": 0.0538,
384
+ "step": 28000
385
+ },
386
+ {
387
+ "epoch": 3.8,
388
+ "learning_rate": 4.05e-05,
389
+ "loss": 0.0533,
390
+ "step": 28500
391
+ },
392
+ {
393
+ "epoch": 3.87,
394
+ "learning_rate": 4.0333333333333336e-05,
395
+ "loss": 0.0544,
396
+ "step": 29000
397
+ },
398
+ {
399
+ "epoch": 3.93,
400
+ "learning_rate": 4.016666666666667e-05,
401
+ "loss": 0.0536,
402
+ "step": 29500
403
+ },
404
+ {
405
+ "epoch": 4.0,
406
+ "learning_rate": 4e-05,
407
+ "loss": 0.0528,
408
+ "step": 30000
409
+ },
410
+ {
411
+ "epoch": 4.0,
412
+ "eval_gen_len": 11.784,
413
+ "eval_loss": 0.06298327445983887,
414
+ "eval_rouge1": 37.8876,
415
+ "eval_rouge2": 26.9586,
416
+ "eval_rougeL": 37.2585,
417
+ "eval_rougeLsum": 37.7378,
418
+ "eval_runtime": 109.3283,
419
+ "eval_samples_per_second": 9.147,
420
+ "eval_steps_per_second": 1.143,
421
+ "step": 30000
422
+ },
423
+ {
424
+ "epoch": 4.07,
425
+ "learning_rate": 3.983333333333333e-05,
426
+ "loss": 0.0488,
427
+ "step": 30500
428
+ },
429
+ {
430
+ "epoch": 4.13,
431
+ "learning_rate": 3.966666666666667e-05,
432
+ "loss": 0.0475,
433
+ "step": 31000
434
+ },
435
+ {
436
+ "epoch": 4.2,
437
+ "learning_rate": 3.9500000000000005e-05,
438
+ "loss": 0.0487,
439
+ "step": 31500
440
+ },
441
+ {
442
+ "epoch": 4.27,
443
+ "learning_rate": 3.933333333333333e-05,
444
+ "loss": 0.0493,
445
+ "step": 32000
446
+ },
447
+ {
448
+ "epoch": 4.33,
449
+ "learning_rate": 3.9166666666666665e-05,
450
+ "loss": 0.0482,
451
+ "step": 32500
452
+ },
453
+ {
454
+ "epoch": 4.4,
455
+ "learning_rate": 3.9000000000000006e-05,
456
+ "loss": 0.0504,
457
+ "step": 33000
458
+ },
459
+ {
460
+ "epoch": 4.47,
461
+ "learning_rate": 3.883333333333333e-05,
462
+ "loss": 0.0495,
463
+ "step": 33500
464
+ },
465
+ {
466
+ "epoch": 4.53,
467
+ "learning_rate": 3.866666666666667e-05,
468
+ "loss": 0.0477,
469
+ "step": 34000
470
+ },
471
+ {
472
+ "epoch": 4.6,
473
+ "learning_rate": 3.85e-05,
474
+ "loss": 0.049,
475
+ "step": 34500
476
+ },
477
+ {
478
+ "epoch": 4.67,
479
+ "learning_rate": 3.8333333333333334e-05,
480
+ "loss": 0.0483,
481
+ "step": 35000
482
+ },
483
+ {
484
+ "epoch": 4.73,
485
+ "learning_rate": 3.816666666666667e-05,
486
+ "loss": 0.0509,
487
+ "step": 35500
488
+ },
489
+ {
490
+ "epoch": 4.8,
491
+ "learning_rate": 3.8e-05,
492
+ "loss": 0.0505,
493
+ "step": 36000
494
+ },
495
+ {
496
+ "epoch": 4.87,
497
+ "learning_rate": 3.7833333333333336e-05,
498
+ "loss": 0.0506,
499
+ "step": 36500
500
+ },
501
+ {
502
+ "epoch": 4.93,
503
+ "learning_rate": 3.766666666666667e-05,
504
+ "loss": 0.049,
505
+ "step": 37000
506
+ },
507
+ {
508
+ "epoch": 5.0,
509
+ "learning_rate": 3.7500000000000003e-05,
510
+ "loss": 0.0485,
511
+ "step": 37500
512
+ },
513
+ {
514
+ "epoch": 5.0,
515
+ "eval_gen_len": 14.157,
516
+ "eval_loss": 0.0629316121339798,
517
+ "eval_rouge1": 39.0822,
518
+ "eval_rouge2": 27.4073,
519
+ "eval_rougeL": 38.1885,
520
+ "eval_rougeLsum": 38.8776,
521
+ "eval_runtime": 112.3853,
522
+ "eval_samples_per_second": 8.898,
523
+ "eval_steps_per_second": 1.112,
524
+ "step": 37500
525
+ }
526
+ ],
527
+ "logging_steps": 500,
528
+ "max_steps": 150000,
529
+ "num_input_tokens_seen": 0,
530
+ "num_train_epochs": 20,
531
+ "save_steps": 500,
532
+ "total_flos": 1.601193167290368e+20,
533
+ "train_batch_size": 8,
534
+ "trial_name": null,
535
+ "trial_params": null
536
+ }
Model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dd77040949647fa4b081f2f6be19d1ed5b3019d92fd8ecb74d288af93cd6290
3
+ size 4411
Model/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
Test-Images/0d930f0a-46f813a9-db3b137b-05142eef-eca3c5a7.jpg ADDED

Git LFS Details

  • SHA256: f80804e8e2532e0bb3665c1790380728b67eb79f0fc4d6c0b9163d2596ea5ff3
  • Pointer size: 132 Bytes
  • Size of remote file: 1.93 MB
Test-Images/6ff741e9-6ea01eef-1bf10153-d1b6beba-590b6620.jpg ADDED

Git LFS Details

  • SHA256: 7c1aa5b4227347d97ace457c47d05f4df8aadc2df32b48ffac0dd4fe625f59ac
  • Pointer size: 132 Bytes
  • Size of remote file: 1.77 MB
Test-Images/93681764-ec39480e-0518b12c-199850c2-f15118ab.jpg ADDED

Git LFS Details

  • SHA256: 4c133343b1322cd385660c297657f864e0c1e905147088e8c514caa0225b978e
  • Pointer size: 132 Bytes
  • Size of remote file: 1.74 MB
pytorch_grad_cam/Readme.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### Grad-CAM visualization of any VisionEncoderDecoder model
2
+
3
+ # Step 1: Open /pytorch_grad_cam folder and make sure that in init.py all the CAM version is imported as the class name not the python file. For example
4
+ from pytorch_grad_cam.grad_cam import GradCAM
5
+ because when in the main python code (Grad_CAM_Visualization.py) we want to import every Class directly.
6
+
7
+ # Step2: Open the main Grad-CAM code: Grad_CAM_Visualization.py and edit the following function according to your model.
8
+ # "def reshape_transform(tensor, height=14, width=14):
9
+ result = tensor[:, 1:, :].reshape(tensor.size(0),
10
+ height, width, tensor.size(2))
11
+ result = result.transpose(2, 3).transpose(1, 2)
12
+ # return result"
13
+ here as the resized image tensor was [150,528] which should be equivalent to the reshaped transform of [1,14,14,768]
14
+ ## The error message should be like this if any mismatch:
15
+ RuntimeError: shape '[1, 16, 16, 768]' is invalid for input of size 150528
16
+
17
+ # Step 3: Choose your desired model from (DeIT_Base16_Pretrained with ImageNeT, Customized VisionTransformer, Dino_Base16_Pretrained with ImageNeT, My customized DeiT-CXR model, My customized EfficientNet model, and ##VisionEncoderDecoder Model)
18
+
19
+ # Step 4: Open base_cam.py file and go to the "forward" function of Class BaseCAM.
20
+ Write extra line "outputs = outputs.pooler_output" for ##VisionEncoderDecoder Model as we need to take the tensor of pooler_output of the model configuration. Follow the comment line as well.
21
+
22
+ # Step 5: Then follow the comments in the Grad_CAM_Visualization.py:
23
+ use model.encoder instead of model for ## VisionEncoderDecoder Model
24
+ use different target_layers for different model
25
+ target_layers = [model.encoder.encoder.layer[-1].layernorm_before] for ## VisionEncoderDecoder Model
26
+
27
+ # Step 6: Change the image_path and output_path accordingly
28
+
29
+ # Step 7: Run python Grad_CAM_Visualization.py --use-cuda --image-path "directory/image_path" --method "any grad-cam method defined in the code"
pytorch_grad_cam/__init__.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pytorch_grad_cam.grad_cam import GradCAM
2
+ from pytorch_grad_cam.hirescam import HiResCAM
3
+ from pytorch_grad_cam.grad_cam_elementwise import GradCAMElementWise
4
+ from pytorch_grad_cam.ablation_layer import AblationLayer, AblationLayerVit, AblationLayerFasterRCNN
5
+ from pytorch_grad_cam.ablation_cam import AblationCAM
6
+ from pytorch_grad_cam.xgrad_cam import XGradCAM
7
+ from pytorch_grad_cam.grad_cam_plusplus import GradCAMPlusPlus
8
+ from pytorch_grad_cam.score_cam import ScoreCAM
9
+ from pytorch_grad_cam.layer_cam import LayerCAM
10
+ from pytorch_grad_cam.eigen_cam import EigenCAM
11
+ from pytorch_grad_cam.eigen_grad_cam import EigenGradCAM
12
+ from pytorch_grad_cam.random_cam import RandomCAM
13
+ from pytorch_grad_cam.fullgrad_cam import FullGrad
14
+ from pytorch_grad_cam.guided_backprop import GuidedBackpropReLUModel
15
+ from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients
16
+ from pytorch_grad_cam.feature_factorization.deep_feature_factorization import DeepFeatureFactorization, run_dff_on_image
17
+ import pytorch_grad_cam.utils.model_targets
18
+ import pytorch_grad_cam.utils.reshape_transforms
19
+ import pytorch_grad_cam.metrics.cam_mult_image
20
+ import pytorch_grad_cam.metrics.road
pytorch_grad_cam/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (1.57 kB). View file
 
pytorch_grad_cam/__pycache__/ablation_cam.cpython-39.pyc ADDED
Binary file (3.7 kB). View file
 
pytorch_grad_cam/__pycache__/ablation_layer.cpython-39.pyc ADDED
Binary file (5.25 kB). View file
 
pytorch_grad_cam/__pycache__/activations_and_gradients.cpython-39.pyc ADDED
Binary file (1.9 kB). View file
 
pytorch_grad_cam/__pycache__/base_cam.cpython-39.pyc ADDED
Binary file (5.86 kB). View file
 
pytorch_grad_cam/__pycache__/eigen_cam.cpython-39.pyc ADDED
Binary file (948 Bytes). View file
 
pytorch_grad_cam/__pycache__/eigen_grad_cam.cpython-39.pyc ADDED
Binary file (942 Bytes). View file
 
pytorch_grad_cam/__pycache__/fullgrad_cam.cpython-39.pyc ADDED
Binary file (3.2 kB). View file
 
pytorch_grad_cam/__pycache__/grad_cam.cpython-39.pyc ADDED
Binary file (889 Bytes). View file
 
pytorch_grad_cam/__pycache__/grad_cam_elementwise.cpython-39.pyc ADDED
Binary file (1.11 kB). View file
 
pytorch_grad_cam/__pycache__/grad_cam_plusplus.cpython-39.pyc ADDED
Binary file (1.14 kB). View file
 
pytorch_grad_cam/__pycache__/guided_backprop.cpython-39.pyc ADDED
Binary file (3.43 kB). View file
 
pytorch_grad_cam/__pycache__/hirescam.cpython-39.pyc ADDED
Binary file (1.14 kB). View file
 
pytorch_grad_cam/__pycache__/layer_cam.cpython-39.pyc ADDED
Binary file (1.07 kB). View file
 
pytorch_grad_cam/__pycache__/random_cam.cpython-39.pyc ADDED
Binary file (938 Bytes). View file
 
pytorch_grad_cam/__pycache__/score_cam.cpython-39.pyc ADDED
Binary file (1.97 kB). View file
 
pytorch_grad_cam/__pycache__/xgrad_cam.cpython-39.pyc ADDED
Binary file (1 kB). View file
 
pytorch_grad_cam/ablation_cam.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import tqdm
4
+ from typing import Callable, List
5
+ from pytorch_grad_cam.base_cam import BaseCAM
6
+ from pytorch_grad_cam.utils.find_layers import replace_layer_recursive
7
+ from pytorch_grad_cam.ablation_layer import AblationLayer
8
+
9
+
10
+ """ Implementation of AblationCAM
11
+ https://openaccess.thecvf.com/content_WACV_2020/papers/Desai_Ablation-CAM_Visual_Explanations_for_Deep_Convolutional_Network_via_Gradient-free_Localization_WACV_2020_paper.pdf
12
+
13
+ Ablate individual activations, and then measure the drop in the target score.
14
+
15
+ In the current implementation, the target layer activations is cached, so it won't be re-computed.
16
+ However layers before it, if any, will not be cached.
17
+ This means that if the target layer is a large block, for example model.featuers (in vgg), there will
18
+ be a large save in run time.
19
+
20
+ Since we have to go over many channels and ablate them, and every channel ablation requires a forward pass,
21
+ it would be nice if we could avoid doing that for channels that won't contribute anwyay, making it much faster.
22
+ The parameter ratio_channels_to_ablate controls how many channels should be ablated, using an experimental method
23
+ (to be improved). The default 1.0 value means that all channels will be ablated.
24
+ """
25
+
26
+
27
+ class AblationCAM(BaseCAM):
28
+ def __init__(self,
29
+ model: torch.nn.Module,
30
+ target_layers: List[torch.nn.Module],
31
+ use_cuda: bool = False,
32
+ reshape_transform: Callable = None,
33
+ ablation_layer: torch.nn.Module = AblationLayer(),
34
+ batch_size: int = 32,
35
+ ratio_channels_to_ablate: float = 1.0) -> None:
36
+
37
+ super(AblationCAM, self).__init__(model,
38
+ target_layers,
39
+ use_cuda,
40
+ reshape_transform,
41
+ uses_gradients=False)
42
+ self.batch_size = batch_size
43
+ self.ablation_layer = ablation_layer
44
+ self.ratio_channels_to_ablate = ratio_channels_to_ablate
45
+
46
+ def save_activation(self, module, input, output) -> None:
47
+ """ Helper function to save the raw activations from the target layer """
48
+ self.activations = output
49
+
50
+ def assemble_ablation_scores(self,
51
+ new_scores: list,
52
+ original_score: float,
53
+ ablated_channels: np.ndarray,
54
+ number_of_channels: int) -> np.ndarray:
55
+ """ Take the value from the channels that were ablated,
56
+ and just set the original score for the channels that were skipped """
57
+
58
+ index = 0
59
+ result = []
60
+ sorted_indices = np.argsort(ablated_channels)
61
+ ablated_channels = ablated_channels[sorted_indices]
62
+ new_scores = np.float32(new_scores)[sorted_indices]
63
+
64
+ for i in range(number_of_channels):
65
+ if index < len(ablated_channels) and ablated_channels[index] == i:
66
+ weight = new_scores[index]
67
+ index = index + 1
68
+ else:
69
+ weight = original_score
70
+ result.append(weight)
71
+
72
+ return result
73
+
74
+ def get_cam_weights(self,
75
+ input_tensor: torch.Tensor,
76
+ target_layer: torch.nn.Module,
77
+ targets: List[Callable],
78
+ activations: torch.Tensor,
79
+ grads: torch.Tensor) -> np.ndarray:
80
+
81
+ # Do a forward pass, compute the target scores, and cache the
82
+ # activations
83
+ handle = target_layer.register_forward_hook(self.save_activation)
84
+ with torch.no_grad():
85
+ outputs = self.model(input_tensor)
86
+ handle.remove()
87
+ original_scores = np.float32(
88
+ [target(output).cpu().item() for target, output in zip(targets, outputs)])
89
+
90
+ # Replace the layer with the ablation layer.
91
+ # When we finish, we will replace it back, so the original model is
92
+ # unchanged.
93
+ ablation_layer = self.ablation_layer
94
+ replace_layer_recursive(self.model, target_layer, ablation_layer)
95
+
96
+ number_of_channels = activations.shape[1]
97
+ weights = []
98
+ # This is a "gradient free" method, so we don't need gradients here.
99
+ with torch.no_grad():
100
+ # Loop over each of the batch images and ablate activations for it.
101
+ for batch_index, (target, tensor) in enumerate(
102
+ zip(targets, input_tensor)):
103
+ new_scores = []
104
+ batch_tensor = tensor.repeat(self.batch_size, 1, 1, 1)
105
+
106
+ # Check which channels should be ablated. Normally this will be all channels,
107
+ # But we can also try to speed this up by using a low
108
+ # ratio_channels_to_ablate.
109
+ channels_to_ablate = ablation_layer.activations_to_be_ablated(
110
+ activations[batch_index, :], self.ratio_channels_to_ablate)
111
+ number_channels_to_ablate = len(channels_to_ablate)
112
+
113
+ for i in tqdm.tqdm(
114
+ range(
115
+ 0,
116
+ number_channels_to_ablate,
117
+ self.batch_size)):
118
+ if i + self.batch_size > number_channels_to_ablate:
119
+ batch_tensor = batch_tensor[:(
120
+ number_channels_to_ablate - i)]
121
+
122
+ # Change the state of the ablation layer so it ablates the next channels.
123
+ # TBD: Move this into the ablation layer forward pass.
124
+ ablation_layer.set_next_batch(
125
+ input_batch_index=batch_index,
126
+ activations=self.activations,
127
+ num_channels_to_ablate=batch_tensor.size(0))
128
+ score = [target(o).cpu().item()
129
+ for o in self.model(batch_tensor)]
130
+ new_scores.extend(score)
131
+ ablation_layer.indices = ablation_layer.indices[batch_tensor.size(
132
+ 0):]
133
+
134
+ new_scores = self.assemble_ablation_scores(
135
+ new_scores,
136
+ original_scores[batch_index],
137
+ channels_to_ablate,
138
+ number_of_channels)
139
+ weights.extend(new_scores)
140
+
141
+ weights = np.float32(weights)
142
+ weights = weights.reshape(activations.shape[:2])
143
+ original_scores = original_scores[:, None]
144
+ weights = (original_scores - weights) / original_scores
145
+
146
+ # Replace the model back to the original state
147
+ replace_layer_recursive(self.model, ablation_layer, target_layer)
148
+ return weights
pytorch_grad_cam/ablation_cam_multilayer.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import torch
4
+ import tqdm
5
+ from pytorch_grad_cam.base_cam import BaseCAM
6
+
7
+
8
+ class AblationLayer(torch.nn.Module):
9
+ def __init__(self, layer, reshape_transform, indices):
10
+ super(AblationLayer, self).__init__()
11
+
12
+ self.layer = layer
13
+ self.reshape_transform = reshape_transform
14
+ # The channels to zero out:
15
+ self.indices = indices
16
+
17
+ def forward(self, x):
18
+ self.__call__(x)
19
+
20
+ def __call__(self, x):
21
+ output = self.layer(x)
22
+
23
+ # Hack to work with ViT,
24
+ # Since the activation channels are last and not first like in CNNs
25
+ # Probably should remove it?
26
+ if self.reshape_transform is not None:
27
+ output = output.transpose(1, 2)
28
+
29
+ for i in range(output.size(0)):
30
+
31
+ # Commonly the minimum activation will be 0,
32
+ # And then it makes sense to zero it out.
33
+ # However depending on the architecture,
34
+ # If the values can be negative, we use very negative values
35
+ # to perform the ablation, deviating from the paper.
36
+ if torch.min(output) == 0:
37
+ output[i, self.indices[i], :] = 0
38
+ else:
39
+ ABLATION_VALUE = 1e5
40
+ output[i, self.indices[i], :] = torch.min(
41
+ output) - ABLATION_VALUE
42
+
43
+ if self.reshape_transform is not None:
44
+ output = output.transpose(2, 1)
45
+
46
+ return output
47
+
48
+
49
+ def replace_layer_recursive(model, old_layer, new_layer):
50
+ for name, layer in model._modules.items():
51
+ if layer == old_layer:
52
+ model._modules[name] = new_layer
53
+ return True
54
+ elif replace_layer_recursive(layer, old_layer, new_layer):
55
+ return True
56
+ return False
57
+
58
+
59
+ class AblationCAM(BaseCAM):
60
+ def __init__(self, model, target_layers, use_cuda=False,
61
+ reshape_transform=None):
62
+ super(AblationCAM, self).__init__(model, target_layers, use_cuda,
63
+ reshape_transform)
64
+
65
+ if len(target_layers) > 1:
66
+ print(
67
+ "Warning. You are usign Ablation CAM with more than 1 layers. "
68
+ "This is supported only if all layers have the same output shape")
69
+
70
+ def set_ablation_layers(self):
71
+ self.ablation_layers = []
72
+ for target_layer in self.target_layers:
73
+ ablation_layer = AblationLayer(target_layer,
74
+ self.reshape_transform, indices=[])
75
+ self.ablation_layers.append(ablation_layer)
76
+ replace_layer_recursive(self.model, target_layer, ablation_layer)
77
+
78
+ def unset_ablation_layers(self):
79
+ # replace the model back to the original state
80
+ for ablation_layer, target_layer in zip(
81
+ self.ablation_layers, self.target_layers):
82
+ replace_layer_recursive(self.model, ablation_layer, target_layer)
83
+
84
+ def set_ablation_layer_batch_indices(self, indices):
85
+ for ablation_layer in self.ablation_layers:
86
+ ablation_layer.indices = indices
87
+
88
+ def trim_ablation_layer_batch_indices(self, keep):
89
+ for ablation_layer in self.ablation_layers:
90
+ ablation_layer.indices = ablation_layer.indices[:keep]
91
+
92
+ def get_cam_weights(self,
93
+ input_tensor,
94
+ target_category,
95
+ activations,
96
+ grads):
97
+ with torch.no_grad():
98
+ outputs = self.model(input_tensor).cpu().numpy()
99
+ original_scores = []
100
+ for i in range(input_tensor.size(0)):
101
+ original_scores.append(outputs[i, target_category[i]])
102
+ original_scores = np.float32(original_scores)
103
+
104
+ self.set_ablation_layers()
105
+
106
+ if hasattr(self, "batch_size"):
107
+ BATCH_SIZE = self.batch_size
108
+ else:
109
+ BATCH_SIZE = 32
110
+
111
+ number_of_channels = activations.shape[1]
112
+ weights = []
113
+
114
+ with torch.no_grad():
115
+ # Iterate over the input batch
116
+ for tensor, category in zip(input_tensor, target_category):
117
+ batch_tensor = tensor.repeat(BATCH_SIZE, 1, 1, 1)
118
+ for i in tqdm.tqdm(range(0, number_of_channels, BATCH_SIZE)):
119
+ self.set_ablation_layer_batch_indices(
120
+ list(range(i, i + BATCH_SIZE)))
121
+
122
+ if i + BATCH_SIZE > number_of_channels:
123
+ keep = number_of_channels - i
124
+ batch_tensor = batch_tensor[:keep]
125
+ self.trim_ablation_layer_batch_indices(self, keep)
126
+ score = self.model(batch_tensor)[:, category].cpu().numpy()
127
+ weights.extend(score)
128
+
129
+ weights = np.float32(weights)
130
+ weights = weights.reshape(activations.shape[:2])
131
+ original_scores = original_scores[:, None]
132
+ weights = (original_scores - weights) / original_scores
133
+
134
+ # replace the model back to the original state
135
+ self.unset_ablation_layers()
136
+ return weights
pytorch_grad_cam/ablation_layer.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from collections import OrderedDict
3
+ import numpy as np
4
+ from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection
5
+
6
+
7
+ class AblationLayer(torch.nn.Module):
8
+ def __init__(self):
9
+ super(AblationLayer, self).__init__()
10
+
11
+ def objectiveness_mask_from_svd(self, activations, threshold=0.01):
12
+ """ Experimental method to get a binary mask to compare if the activation is worth ablating.
13
+ The idea is to apply the EigenCAM method by doing PCA on the activations.
14
+ Then we create a binary mask by comparing to a low threshold.
15
+ Areas that are masked out, are probably not interesting anyway.
16
+ """
17
+
18
+ projection = get_2d_projection(activations[None, :])[0, :]
19
+ projection = np.abs(projection)
20
+ projection = projection - projection.min()
21
+ projection = projection / projection.max()
22
+ projection = projection > threshold
23
+ return projection
24
+
25
+ def activations_to_be_ablated(
26
+ self,
27
+ activations,
28
+ ratio_channels_to_ablate=1.0):
29
+ """ Experimental method to get a binary mask to compare if the activation is worth ablating.
30
+ Create a binary CAM mask with objectiveness_mask_from_svd.
31
+ Score each Activation channel, by seeing how much of its values are inside the mask.
32
+ Then keep the top channels.
33
+
34
+ """
35
+ if ratio_channels_to_ablate == 1.0:
36
+ self.indices = np.int32(range(activations.shape[0]))
37
+ return self.indices
38
+
39
+ projection = self.objectiveness_mask_from_svd(activations)
40
+
41
+ scores = []
42
+ for channel in activations:
43
+ normalized = np.abs(channel)
44
+ normalized = normalized - normalized.min()
45
+ normalized = normalized / np.max(normalized)
46
+ score = (projection * normalized).sum() / normalized.sum()
47
+ scores.append(score)
48
+ scores = np.float32(scores)
49
+
50
+ indices = list(np.argsort(scores))
51
+ high_score_indices = indices[::-
52
+ 1][: int(len(indices) *
53
+ ratio_channels_to_ablate)]
54
+ low_score_indices = indices[: int(
55
+ len(indices) * ratio_channels_to_ablate)]
56
+ self.indices = np.int32(high_score_indices + low_score_indices)
57
+ return self.indices
58
+
59
+ def set_next_batch(
60
+ self,
61
+ input_batch_index,
62
+ activations,
63
+ num_channels_to_ablate):
64
+ """ This creates the next batch of activations from the layer.
65
+ Just take corresponding batch member from activations, and repeat it num_channels_to_ablate times.
66
+ """
67
+ self.activations = activations[input_batch_index, :, :, :].clone(
68
+ ).unsqueeze(0).repeat(num_channels_to_ablate, 1, 1, 1)
69
+
70
+ def __call__(self, x):
71
+ output = self.activations
72
+ for i in range(output.size(0)):
73
+ # Commonly the minimum activation will be 0,
74
+ # And then it makes sense to zero it out.
75
+ # However depending on the architecture,
76
+ # If the values can be negative, we use very negative values
77
+ # to perform the ablation, deviating from the paper.
78
+ if torch.min(output) == 0:
79
+ output[i, self.indices[i], :] = 0
80
+ else:
81
+ ABLATION_VALUE = 1e7
82
+ output[i, self.indices[i], :] = torch.min(
83
+ output) - ABLATION_VALUE
84
+
85
+ return output
86
+
87
+
88
+ class AblationLayerVit(AblationLayer):
89
+ def __init__(self):
90
+ super(AblationLayerVit, self).__init__()
91
+
92
+ def __call__(self, x):
93
+ output = self.activations
94
+ output = output.transpose(1, len(output.shape) - 1)
95
+ for i in range(output.size(0)):
96
+
97
+ # Commonly the minimum activation will be 0,
98
+ # And then it makes sense to zero it out.
99
+ # However depending on the architecture,
100
+ # If the values can be negative, we use very negative values
101
+ # to perform the ablation, deviating from the paper.
102
+ if torch.min(output) == 0:
103
+ output[i, self.indices[i], :] = 0
104
+ else:
105
+ ABLATION_VALUE = 1e7
106
+ output[i, self.indices[i], :] = torch.min(
107
+ output) - ABLATION_VALUE
108
+
109
+ output = output.transpose(len(output.shape) - 1, 1)
110
+
111
+ return output
112
+
113
+ def set_next_batch(
114
+ self,
115
+ input_batch_index,
116
+ activations,
117
+ num_channels_to_ablate):
118
+ """ This creates the next batch of activations from the layer.
119
+ Just take corresponding batch member from activations, and repeat it num_channels_to_ablate times.
120
+ """
121
+ repeat_params = [num_channels_to_ablate] + \
122
+ len(activations.shape[:-1]) * [1]
123
+ self.activations = activations[input_batch_index, :, :].clone(
124
+ ).unsqueeze(0).repeat(*repeat_params)
125
+
126
+
127
+ class AblationLayerFasterRCNN(AblationLayer):
128
+ def __init__(self):
129
+ super(AblationLayerFasterRCNN, self).__init__()
130
+
131
+ def set_next_batch(
132
+ self,
133
+ input_batch_index,
134
+ activations,
135
+ num_channels_to_ablate):
136
+ """ Extract the next batch member from activations,
137
+ and repeat it num_channels_to_ablate times.
138
+ """
139
+ self.activations = OrderedDict()
140
+ for key, value in activations.items():
141
+ fpn_activation = value[input_batch_index,
142
+ :, :, :].clone().unsqueeze(0)
143
+ self.activations[key] = fpn_activation.repeat(
144
+ num_channels_to_ablate, 1, 1, 1)
145
+
146
+ def __call__(self, x):
147
+ result = self.activations
148
+ layers = {0: '0', 1: '1', 2: '2', 3: '3', 4: 'pool'}
149
+ num_channels_to_ablate = result['pool'].size(0)
150
+ for i in range(num_channels_to_ablate):
151
+ pyramid_layer = int(self.indices[i] / 256)
152
+ index_in_pyramid_layer = int(self.indices[i] % 256)
153
+ result[layers[pyramid_layer]][i,
154
+ index_in_pyramid_layer, :, :] = -1000
155
+ return result
pytorch_grad_cam/activations_and_gradients.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class ActivationsAndGradients:
2
+ """ Class for extracting activations and
3
+ registering gradients from targetted intermediate layers """
4
+
5
+ def __init__(self, model, target_layers, reshape_transform):
6
+ self.model = model
7
+ self.gradients = []
8
+ self.activations = []
9
+ self.reshape_transform = reshape_transform
10
+ self.handles = []
11
+ for target_layer in target_layers:
12
+ self.handles.append(
13
+ target_layer.register_forward_hook(self.save_activation))
14
+ # Because of https://github.com/pytorch/pytorch/issues/61519,
15
+ # we don't use backward hook to record gradients.
16
+ self.handles.append(
17
+ target_layer.register_forward_hook(self.save_gradient))
18
+
19
+ def save_activation(self, module, input, output):
20
+ activation = output
21
+
22
+ if self.reshape_transform is not None:
23
+ activation = self.reshape_transform(activation)
24
+ self.activations.append(activation.cpu().detach())
25
+
26
+ def save_gradient(self, module, input, output):
27
+ if not hasattr(output, "requires_grad") or not output.requires_grad:
28
+ # You can only register hooks on tensor requires grad.
29
+ return
30
+
31
+ # Gradients are computed in reverse order
32
+ def _store_grad(grad):
33
+ if self.reshape_transform is not None:
34
+ grad = self.reshape_transform(grad)
35
+ self.gradients = [grad.cpu().detach()] + self.gradients
36
+
37
+ output.register_hook(_store_grad)
38
+
39
+ def __call__(self, x):
40
+ self.gradients = []
41
+ self.activations = []
42
+ return self.model(x)
43
+
44
+ def release(self):
45
+ for handle in self.handles:
46
+ handle.remove()
pytorch_grad_cam/base_cam.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import ttach as tta
4
+ from typing import Callable, List, Tuple
5
+ from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients
6
+ from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection
7
+ from pytorch_grad_cam.utils.image import scale_cam_image
8
+ from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
9
+
10
+
11
+ class BaseCAM:
12
+ def __init__(self,
13
+ model: torch.nn.Module,
14
+ target_layers: List[torch.nn.Module],
15
+ use_cuda: bool = False,
16
+ reshape_transform: Callable = None,
17
+ compute_input_gradient: bool = False,
18
+ uses_gradients: bool = True) -> None:
19
+ self.model = model.eval()
20
+ self.target_layers = target_layers
21
+ self.cuda = use_cuda
22
+ if self.cuda:
23
+ self.model = model.cuda()
24
+ self.reshape_transform = reshape_transform
25
+ self.compute_input_gradient = compute_input_gradient
26
+ self.uses_gradients = uses_gradients
27
+ self.activations_and_grads = ActivationsAndGradients(
28
+ self.model, target_layers, reshape_transform)
29
+
30
+ """ Get a vector of weights for every channel in the target layer.
31
+ Methods that return weights channels,
32
+ will typically need to only implement this function. """
33
+
34
+ def get_cam_weights(self,
35
+ input_tensor: torch.Tensor,
36
+ target_layers: List[torch.nn.Module],
37
+ targets: List[torch.nn.Module],
38
+ activations: torch.Tensor,
39
+ grads: torch.Tensor) -> np.ndarray:
40
+ raise Exception("Not Implemented")
41
+
42
+ def get_cam_image(self,
43
+ input_tensor: torch.Tensor,
44
+ target_layer: torch.nn.Module,
45
+ targets: List[torch.nn.Module],
46
+ activations: torch.Tensor,
47
+ grads: torch.Tensor,
48
+ eigen_smooth: bool = False) -> np.ndarray:
49
+
50
+ weights = self.get_cam_weights(input_tensor,
51
+ target_layer,
52
+ targets,
53
+ activations,
54
+ grads)
55
+ weighted_activations = weights[:, :, None, None] * activations
56
+ if eigen_smooth:
57
+ cam = get_2d_projection(weighted_activations)
58
+ else:
59
+ cam = weighted_activations.sum(axis=1)
60
+ return cam
61
+
62
+ def forward(self,
63
+ input_tensor: torch.Tensor,
64
+ targets: List[torch.nn.Module],
65
+ eigen_smooth: bool = False) -> np.ndarray:
66
+
67
+ if self.cuda:
68
+ input_tensor = input_tensor.cuda()
69
+
70
+ if self.compute_input_gradient:
71
+ input_tensor = torch.autograd.Variable(input_tensor,
72
+ requires_grad=True)
73
+
74
+ outputs = self.activations_and_grads(input_tensor)
75
+ outputs = outputs.pooler_output # Only for ViT-GPT2 or any other VisionEncoderDecoder model
76
+ print(outputs)
77
+ if targets is None:
78
+ target_categories = np.argmax(outputs.cpu().data.numpy(), axis=-1) #np.argmax(outputs.cpu().data.numpy(), axis=-1)
79
+ targets = [ClassifierOutputTarget(
80
+ category) for category in target_categories]
81
+
82
+ if self.uses_gradients:
83
+ self.model.zero_grad()
84
+ loss = sum([target(output)
85
+ for target, output in zip(targets, outputs)])
86
+ loss.backward(retain_graph=True)
87
+
88
+ # In most of the saliency attribution papers, the saliency is
89
+ # computed with a single target layer.
90
+ # Commonly it is the last convolutional layer.
91
+ # Here we support passing a list with multiple target layers.
92
+ # It will compute the saliency image for every image,
93
+ # and then aggregate them (with a default mean aggregation).
94
+ # This gives you more flexibility in case you just want to
95
+ # use all conv layers for example, all Batchnorm layers,
96
+ # or something else.
97
+ cam_per_layer = self.compute_cam_per_layer(input_tensor,
98
+ targets,
99
+ eigen_smooth)
100
+ return self.aggregate_multi_layers(cam_per_layer)
101
+
102
+ def get_target_width_height(self,
103
+ input_tensor: torch.Tensor) -> Tuple[int, int]:
104
+ width, height = input_tensor.size(-1), input_tensor.size(-2)
105
+ return width, height
106
+
107
+ def compute_cam_per_layer(
108
+ self,
109
+ input_tensor: torch.Tensor,
110
+ targets: List[torch.nn.Module],
111
+ eigen_smooth: bool) -> np.ndarray:
112
+ activations_list = [a.cpu().data.numpy()
113
+ for a in self.activations_and_grads.activations]
114
+ grads_list = [g.cpu().data.numpy()
115
+ for g in self.activations_and_grads.gradients]
116
+ target_size = self.get_target_width_height(input_tensor)
117
+
118
+ cam_per_target_layer = []
119
+ # Loop over the saliency image from every layer
120
+ for i in range(len(self.target_layers)):
121
+ target_layer = self.target_layers[i]
122
+ layer_activations = None
123
+ layer_grads = None
124
+ if i < len(activations_list):
125
+ layer_activations = activations_list[i]
126
+ if i < len(grads_list):
127
+ layer_grads = grads_list[i]
128
+
129
+ cam = self.get_cam_image(input_tensor,
130
+ target_layer,
131
+ targets,
132
+ layer_activations,
133
+ layer_grads,
134
+ eigen_smooth)
135
+ cam = np.maximum(cam, 0)
136
+ scaled = scale_cam_image(cam, target_size)
137
+ cam_per_target_layer.append(scaled[:, None, :])
138
+
139
+ return cam_per_target_layer
140
+
141
+ def aggregate_multi_layers(
142
+ self,
143
+ cam_per_target_layer: np.ndarray) -> np.ndarray:
144
+ cam_per_target_layer = np.concatenate(cam_per_target_layer, axis=1)
145
+ cam_per_target_layer = np.maximum(cam_per_target_layer, 0)
146
+ result = np.mean(cam_per_target_layer, axis=1)
147
+ return scale_cam_image(result)
148
+
149
+ def forward_augmentation_smoothing(self,
150
+ input_tensor: torch.Tensor,
151
+ targets: List[torch.nn.Module],
152
+ eigen_smooth: bool = False) -> np.ndarray:
153
+ transforms = tta.Compose(
154
+ [
155
+ tta.HorizontalFlip(),
156
+ tta.Multiply(factors=[0.9, 1, 1.1]),
157
+ ]
158
+ )
159
+ cams = []
160
+ for transform in transforms:
161
+ augmented_tensor = transform.augment_image(input_tensor)
162
+ cam = self.forward(augmented_tensor,
163
+ targets,
164
+ eigen_smooth)
165
+
166
+ # The ttach library expects a tensor of size BxCxHxW
167
+ cam = cam[:, None, :, :]
168
+ cam = torch.from_numpy(cam)
169
+ cam = transform.deaugment_mask(cam)
170
+
171
+ # Back to numpy float32, HxW
172
+ cam = cam.numpy()
173
+ cam = cam[:, 0, :, :]
174
+ cams.append(cam)
175
+
176
+ cam = np.mean(np.float32(cams), axis=0)
177
+ return cam
178
+
179
+ def __call__(self,
180
+ input_tensor: torch.Tensor,
181
+ targets: List[torch.nn.Module] = None,
182
+ aug_smooth: bool = False,
183
+ eigen_smooth: bool = False) -> np.ndarray:
184
+
185
+ # Smooth the CAM result with test time augmentation
186
+ if aug_smooth is True:
187
+ return self.forward_augmentation_smoothing(
188
+ input_tensor, targets, eigen_smooth)
189
+
190
+ return self.forward(input_tensor,
191
+ targets, eigen_smooth)
192
+
193
+ def __del__(self):
194
+ self.activations_and_grads.release()
195
+
196
+ def __enter__(self):
197
+ return self
198
+
199
+ def __exit__(self, exc_type, exc_value, exc_tb):
200
+ self.activations_and_grads.release()
201
+ if isinstance(exc_value, IndexError):
202
+ # Handle IndexError here...
203
+ print(
204
+ f"An exception occurred in CAM with block: {exc_type}. Message: {exc_value}")
205
+ return True
pytorch_grad_cam/cam_mult_image.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ from typing import List, Callable
4
+ from pytorch_grad_cam.metrics.perturbation_confidence import PerturbationConfidenceMetric
5
+
6
+
7
+ def multiply_tensor_with_cam(input_tensor: torch.Tensor,
8
+ cam: torch.Tensor):
9
+ """ Multiply an input tensor (after normalization)
10
+ with a pixel attribution map
11
+ """
12
+ return input_tensor * cam
13
+
14
+
15
+ class CamMultImageConfidenceChange(PerturbationConfidenceMetric):
16
+ def __init__(self):
17
+ super(CamMultImageConfidenceChange,
18
+ self).__init__(multiply_tensor_with_cam)
19
+
20
+
21
+ class DropInConfidence(CamMultImageConfidenceChange):
22
+ def __init__(self):
23
+ super(DropInConfidence, self).__init__()
24
+
25
+ def __call__(self, *args, **kwargs):
26
+ scores = super(DropInConfidence, self).__call__(*args, **kwargs)
27
+ scores = -scores
28
+ return np.maximum(scores, 0)
29
+
30
+
31
+ class IncreaseInConfidence(CamMultImageConfidenceChange):
32
+ def __init__(self):
33
+ super(IncreaseInConfidence, self).__init__()
34
+
35
+ def __call__(self, *args, **kwargs):
36
+ scores = super(IncreaseInConfidence, self).__call__(*args, **kwargs)
37
+ return np.float32(scores > 0)
pytorch_grad_cam/eigen_cam.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pytorch_grad_cam.base_cam import BaseCAM
2
+ from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection
3
+
4
+ # https://arxiv.org/abs/2008.00299
5
+
6
+
7
+ class EigenCAM(BaseCAM):
8
+ def __init__(self, model, target_layers, use_cuda=False,
9
+ reshape_transform=None):
10
+ super(EigenCAM, self).__init__(model,
11
+ target_layers,
12
+ use_cuda,
13
+ reshape_transform,
14
+ uses_gradients=False)
15
+
16
+ def get_cam_image(self,
17
+ input_tensor,
18
+ target_layer,
19
+ target_category,
20
+ activations,
21
+ grads,
22
+ eigen_smooth):
23
+ return get_2d_projection(activations)
pytorch_grad_cam/eigen_grad_cam.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pytorch_grad_cam.base_cam import BaseCAM
2
+ from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection
3
+
4
+ # Like Eigen CAM: https://arxiv.org/abs/2008.00299
5
+ # But multiply the activations x gradients
6
+
7
+
8
+ class EigenGradCAM(BaseCAM):
9
+ def __init__(self, model, target_layers, use_cuda=False,
10
+ reshape_transform=None):
11
+ super(EigenGradCAM, self).__init__(model, target_layers, use_cuda,
12
+ reshape_transform)
13
+
14
+ def get_cam_image(self,
15
+ input_tensor,
16
+ target_layer,
17
+ target_category,
18
+ activations,
19
+ grads,
20
+ eigen_smooth):
21
+ return get_2d_projection(grads * activations)
pytorch_grad_cam/feature_factorization/__init__.py ADDED
File without changes
pytorch_grad_cam/feature_factorization/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (180 Bytes). View file
 
pytorch_grad_cam/feature_factorization/__pycache__/deep_feature_factorization.cpython-39.pyc ADDED
Binary file (4.75 kB). View file