fix

Files changed (9) hide show

LLAVA_Biovil/biovil_t/encoder.py +1 -2
LLAVA_Biovil/biovil_t/model.py +1 -2
__pycache__/utils.cpython-310.pyc +0 -0
findings_classifier/__pycache__/__init__.cpython-310.pyc +0 -0
findings_classifier/__pycache__/chexpert_dataset.cpython-310.pyc +0 -0
findings_classifier/__pycache__/chexpert_model.cpython-310.pyc +0 -0
findings_classifier/__pycache__/chexpert_train.cpython-310.pyc +0 -0
simple_test.py +25 -13
utils.py +20 -0

LLAVA_Biovil/biovil_t/encoder.py CHANGED Viewed

@@ -10,7 +10,6 @@ from typing import Any, Generator, Optional, Sequence, Tuple, Union
 import torch
 import torch.nn as nn
-from health_multimodal.common.device import get_module_device
 from timm.models.layers import trunc_normal_
 from .resnet import resnet18, resnet50
@@ -97,7 +96,7 @@ class MultiImageEncoder(ImageEncoder):
         output_dim = 256  # The aggregate feature dim of the encoder is `2 * output_dim` i.e. [f_static, f_diff]
         grid_shape = (14, 14)  # Spatial dimensions of patch grid.
-        backbone_output_feature_dim = get_encoder_output_dim(self.encoder, device=get_module_device(self))
         self.backbone_to_vit = nn.Conv2d(in_channels=backbone_output_feature_dim, out_channels=output_dim,
                                          kernel_size=1, stride=1, padding=0, bias=False)

 import torch
 import torch.nn as nn
 from timm.models.layers import trunc_normal_
 from .resnet import resnet18, resnet50
         output_dim = 256  # The aggregate feature dim of the encoder is `2 * output_dim` i.e. [f_static, f_diff]
         grid_shape = (14, 14)  # Spatial dimensions of patch grid.
+        backbone_output_feature_dim = get_encoder_output_dim(self.encoder, device=torch.device("cuda"))
         self.backbone_to_vit = nn.Conv2d(in_channels=backbone_output_feature_dim, out_channels=output_dim,
                                          kernel_size=1, stride=1, padding=0, bias=False)

LLAVA_Biovil/biovil_t/model.py CHANGED Viewed

@@ -12,7 +12,6 @@ from typing import Any, Optional, Union
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from health_multimodal.common.device import get_module_device
 from .encoder import get_encoder_from_type, get_encoder_output_dim, MultiImageEncoder
 from .modules import MLP, MultiTaskModel
@@ -43,7 +42,7 @@ class ImageModel(BaseImageModel):
         # Initiate encoder, projector, and classifier
         self.encoder = get_encoder_from_type(img_encoder_type)
-        self.feature_size = get_encoder_output_dim(self.encoder, device=get_module_device(self.encoder))
         self.projector = MLP(input_dim=self.feature_size, output_dim=joint_feature_size,
                              hidden_dim=joint_feature_size, use_1x1_convs=True)
         self.downstream_classifier_kwargs = downstream_classifier_kwargs

 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from .encoder import get_encoder_from_type, get_encoder_output_dim, MultiImageEncoder
 from .modules import MLP, MultiTaskModel
         # Initiate encoder, projector, and classifier
         self.encoder = get_encoder_from_type(img_encoder_type)
+        self.feature_size = get_encoder_output_dim(self.encoder, device=torch.device("cuda"))
         self.projector = MLP(input_dim=self.feature_size, output_dim=joint_feature_size,
                              hidden_dim=joint_feature_size, use_1x1_convs=True)
         self.downstream_classifier_kwargs = downstream_classifier_kwargs

__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (3.69 kB). View file

findings_classifier/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (183 Bytes). View file

findings_classifier/__pycache__/chexpert_dataset.cpython-310.pyc ADDED Viewed

Binary file (5.95 kB). View file

findings_classifier/__pycache__/chexpert_model.cpython-310.pyc ADDED Viewed

Binary file (1.09 kB). View file

findings_classifier/__pycache__/chexpert_train.cpython-310.pyc ADDED Viewed

Binary file (11 kB). View file

simple_test.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from pathlib import Path
-from skimage import io as io_img
 import io
 import requests
@@ -14,26 +13,45 @@ from LLAVA_Biovil.llava.model.builder import load_pretrained_model
 from LLAVA_Biovil.llava.conversation import SeparatorStyle, conv_vicuna_v1
 from LLAVA_Biovil.llava.constants import IMAGE_TOKEN_INDEX
-from utils import create_chest_xray_transform_for_inference
-def load_model_from_huggingface(repo_id, model_filename):
     # Download model files
-    model_path = snapshot_download(repo_id=repo_id, revision="main")
-    model_path = Path(model_path) / model_filename
     tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base='liuhaotian/llava-v1.5-7b',
                                                                            model_name="llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_unfrozen_2e-5_5epochs_v5_checkpoint-21000", load_8bit=False, load_4bit=False)
     return tokenizer, model, image_processor, context_len
 if __name__ == '__main__':
     # config = None
     # model_path = "/home/guests/chantal_pellegrini/RaDialog_LLaVA/LLAVA/checkpoints/llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_unfrozen_2e-5_5epochs_v5/checkpoint-21000" #TODO hardcoded in huggingface repo probably
     # model_name = get_model_name_from_path(model_path)
-    tokenizer, model, image_processor, context_len = load_model_from_huggingface(repo_id="Chantal/RaDialog-interactive-radiology-report-generation", model_filename="model")
     model.config.tokenizer_padding_side = "left"
-    findings = "edema, pleural effusion" #TODO should these come from chexpert classifier? Or not needed for this demo/test?
     conv = conv_vicuna_v1.copy()
     REPORT_GEN_PROMPT = f"<image>. Predicted Findings: {findings}. You are to act as a radiologist and write the finding section of a chest x-ray radiology report for this X-ray image and the given predicted findings. Write in the style of a radiologist, write one fluent text without enumeration, be concise and don't provide explanations or reasons."
@@ -44,12 +62,6 @@ if __name__ == '__main__':
     # get the image
     vis_transforms_biovil = create_chest_xray_transform_for_inference(512, center_crop_size=448)
-    sample_img_path = "https://openi.nlm.nih.gov/imgs/512/10/10/CXR10_IM-0002-2001.png?keywords=Calcified%20Granuloma" #TODO find good image
-    response = requests.get(sample_img_path)
-    image = Image.open(io.BytesIO(response.content))
-    image = remap_to_uint8(np.array(image))
-    image = Image.fromarray(image).convert("L")
     image_tensor = vis_transforms_biovil(image).unsqueeze(0)
     image_tensor = image_tensor.to(model.device, dtype=torch.bfloat16)

 from pathlib import Path
 import io
 import requests
 from LLAVA_Biovil.llava.conversation import SeparatorStyle, conv_vicuna_v1
 from LLAVA_Biovil.llava.constants import IMAGE_TOKEN_INDEX
+from utils import create_chest_xray_transform_for_inference, init_chexpert_predictor
+def load_model_from_huggingface(repo_id):
     # Download model files
+    model_path = snapshot_download(repo_id=repo_id, revision="main", force_download=True)
+    model_path = Path(model_path)
     tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base='liuhaotian/llava-v1.5-7b',
                                                                            model_name="llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_unfrozen_2e-5_5epochs_v5_checkpoint-21000", load_8bit=False, load_4bit=False)
     return tokenizer, model, image_processor, context_len
 if __name__ == '__main__':
     # config = None
     # model_path = "/home/guests/chantal_pellegrini/RaDialog_LLaVA/LLAVA/checkpoints/llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_unfrozen_2e-5_5epochs_v5/checkpoint-21000" #TODO hardcoded in huggingface repo probably
     # model_name = get_model_name_from_path(model_path)
+    sample_img_path = "https://openi.nlm.nih.gov/imgs/512/10/10/CXR10_IM-0002-2001.png?keywords=Calcified%20Granuloma" #TODO find good image
+    response = requests.get(sample_img_path)
+    image = Image.open(io.BytesIO(response.content))
+    image = remap_to_uint8(np.array(image))
+    image = Image.fromarray(image).convert("L")
+    tokenizer, model, image_processor, context_len = load_model_from_huggingface(repo_id="Chantal/RaDialog-interactive-radiology-report-generation")
+    cp_model, cp_class_names, cp_transforms = init_chexpert_predictor()
     model.config.tokenizer_padding_side = "left"
+    cp_image = cp_transforms(image)
+    logits = cp_model(cp_image[None].half().cuda())
+    preds_probs = torch.sigmoid(logits)
+    preds = preds_probs > 0.5
+    pred = preds[0].cpu().numpy()
+    findings = cp_class_names[pred].tolist()
+    findings = ', '.join(findings).lower().strip()
     conv = conv_vicuna_v1.copy()
     REPORT_GEN_PROMPT = f"<image>. Predicted Findings: {findings}. You are to act as a radiologist and write the finding section of a chest x-ray radiology report for this X-ray image and the given predicted findings. Write in the style of a radiologist, write one fluent text without enumeration, be concise and don't provide explanations or reasons."
     # get the image
     vis_transforms_biovil = create_chest_xray_transform_for_inference(512, center_crop_size=448)
     image_tensor = vis_transforms_biovil(image).unsqueeze(0)
     image_tensor = image_tensor.to(model.device, dtype=torch.bfloat16)

utils.py CHANGED Viewed

@@ -2,6 +2,9 @@ import numpy as np
 import torch
 from torchvision.transforms import Compose, Resize, ToTensor, CenterCrop, transforms
 class ExpandChannels:
     """
     Transforms an image with one channel to an image with three channels by copying
@@ -60,3 +63,20 @@ def remap_to_uint8(array: np.ndarray, percentiles=None) -> np.ndarray:
         array /= array.max()
         array *= 255
         return array.astype(np.uint8)

 import torch
 from torchvision.transforms import Compose, Resize, ToTensor, CenterCrop, transforms
+from huggingface.findings_classifier.chexpert_train import LitIGClassifier
 class ExpandChannels:
     """
     Transforms an image with one channel to an image with three channels by copying
         array /= array.max()
         array *= 255
         return array.astype(np.uint8)
+def init_chexpert_predictor():
+    ckpt_path = f"findings_classifier/checkpoints/chexpert_train/ChexpertClassifier.ckpt"
+    chexpert_cols = ["No Finding", "Enlarged Cardiomediastinum",
+                     "Cardiomegaly", "Lung Opacity",
+                     "Lung Lesion", "Edema",
+                     "Consolidation", "Pneumonia",
+                     "Atelectasis", "Pneumothorax",
+                     "Pleural Effusion", "Pleural Other",
+                     "Fracture", "Support Devices"]
+    model = LitIGClassifier.load_from_checkpoint(ckpt_path, num_classes=14, class_names=chexpert_cols, strict=False)
+    model.eval()
+    model.cuda()
+    model.half()
+    cp_transforms = Compose([Resize(512), CenterCrop(488), ToTensor(), ExpandChannels()])
+    return model, np.asarray(model.class_names), cp_transforms