Image-Segmentation-Various

Running

File size: 11,384 Bytes

import matplotlib.pyplot as plt
import requests, validators
import torch
import pathlib
import numpy as np
from PIL import Image

from transformers import DetrFeatureExtractor, DetrForSegmentation, MaskFormerImageProcessor, MaskFormerForInstanceSegmentation
# from transformers.models.detr.feature_extraction_detr import rgb_to_id
from transformers.image_transforms import rgb_to_id

TEST_IMAGE = Image.open(r"images/9999999_00783_d_0000358.jpg")
MODEL_NAME_DETR = "facebook/detr-resnet-50-panoptic"
MODEL_NAME_MASKFORMER = "facebook/maskformer-swin-large-coco"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#######
# Parameters
#######
image = TEST_IMAGE
model_name = MODEL_NAME_MASKFORMER

# Starting with MaskFormer

processor = MaskFormerImageProcessor.from_pretrained(model_name) # <class 'transformers.models.maskformer.image_processing_maskformer.MaskFormerImageProcessor'>
# DIR() --> ['__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', 
#           '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', 
#           '__weakref__', '_auto_class', '_create_repo', '_get_files_timestamps', '_max_size', '_pad_image', '_preprocess', '_preprocess_image', '_preprocess_mask', '_processor_class', 
#           '_set_processor_class', '_upload_modified_files', 'center_crop', 'convert_segmentation_map_to_binary_masks', 'do_normalize', 'do_reduce_labels', 'do_rescale', 'do_resize', 
#           'encode_inputs', 'fetch_images', 'from_dict', 'from_json_file', 'from_pretrained', 'get_image_processor_dict', 'ignore_index', 'image_mean', 'image_std', 'model_input_names', 
#           'normalize', 'pad', 'post_process_instance_segmentation', 'post_process_panoptic_segmentation', 'post_process_segmentation', 'post_process_semantic_segmentation', 'preprocess', 
#           'push_to_hub', 'register_for_auto_class', 'resample', 'rescale', 'rescale_factor', 'resize', 'save_pretrained', 'size', 'size_divisor', 'to_dict', 'to_json_file', 'to_json_string']

model = MaskFormerForInstanceSegmentation.from_pretrained(model_name) # <class 'transformers.models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentation'>
# DIR for model was too big
model.to(DEVICE)

# img = np.array(TEST_IMAGE)

inputs = processor(images=image, return_tensors="pt") # <class 'transformers.image_processing_utils.BatchFeature'>
# DIR() --> ['_MutableMapping__marker', '__abstractmethods__', '__class__', '__contains__', '__copy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', 
#           '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', 
#           '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__setattr__', '__setitem__', '__setstate__', '__sizeof__', '__slots__', '__str__', 
#           '__subclasshook__', '__weakref__', '_abc_impl', '_get_is_as_tensor_fns', 'clear', 'convert_to_tensors', 'copy', 'data', 'fromkeys', 'get', 'items', 'keys', 'pop', 'popitem', 
#           'setdefault', 'to', 'update', 'values']
inputs.to(DEVICE)


outputs = model(**inputs) # <class 'transformers.models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentationOutput'>
# Each element of this class is a <class 'torch.Tensor'>
# DIR() --> ['__annotations__', '__class__', '__contains__', '__dataclass_fields__', '__dataclass_params__', '__delattr__', '__delitem__', '__dict__', '__dir__', 
#           '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', 
#           '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__post_init__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__setattr__', 
#           '__setitem__', '__sizeof__', '__str__', '__subclasshook__', 'attentions', 'auxiliary_logits', 'class_queries_logits', 'clear', 'copy', 'encoder_hidden_states', 
#           'encoder_last_hidden_state', 'fromkeys', 'get', 'hidden_states', 'items', 'keys', 'loss', 'masks_queries_logits', 'move_to_end', 'pixel_decoder_hidden_states', 
#           'pixel_decoder_last_hidden_state', 'pop', 'popitem', 'setdefault', 'to_tuple', 'transformer_decoder_hidden_states', 'transformer_decoder_last_hidden_state', 
#           'update', 'values']

results = processor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
# <class 'dict'>
# Keys: dict_keys(['segmentation', 'segments_info'])
# type(results["segments_info"]) --> list
# type(results["segmentation"]) --> <class 'torch.Tensor'>


def show_mask_for_number(map_to_use, label_id):
    """
    map_to_use: You have to pass in `results["segmentation"]`
    """
    if torch.cuda.is_available():
        mask = (map_to_use.cpu().numpy() == label_id)
    else:
        mask = (map_to_use.numpy() == label_id)
    
    visual_mask = (mask* 255).astype(np.uint8)
    visual_mask = Image.fromarray(visual_mask)
    plt.imshow(visual_mask)
    plt.show()


def get_coordinates_for_bb_simple(map_to_use, label_id):
    """
    map_to_use: You have to pass in `results["segmentation"]`
    """
    if torch.cuda.is_available():
        mask = (map_to_use.cpu().numpy() == label_id)
    else:
        mask = (map_to_use.numpy() == label_id)
    
    x, y = np.where(mask==True)
    x_max, x_min = max(x), min(x)
    y_max, y_min = max(y), min(y)
    return (x_min, y_min), (x_max, y_max)

def make_simple_box(left_top, right_bottom, map_size):
    full_mask = np.full(map_size, False)
    left_x, top_y = left_top
    right_x, bottom_y = right_bottom
    full_mask[left_x:right_x, top_y] = True
    full_mask[left_x:right_x, bottom_y] = True
    full_mask[left_x, top_y:bottom_y] = True
    full_mask[right_x, top_y:bottom_y] = True

    visual_mask = (full_mask* 255).astype(np.uint8)
    visual_mask = Image.fromarray(visual_mask)
    plt.imshow(visual_mask)
    plt.show()


def test(map_to_use, label_id):
    """
    map_to_use: You have to pass in `results["segmentation"]`
    """
    if torch.cuda.is_available():
        mask = (map_to_use.cpu().numpy() == label_id)
    else:
        mask = (map_to_use.numpy() == label_id)
    
    
    lt, rb = get_coordinates_for_bb_simple(map_to_use, label_id)
    left_x, top_y = lt
    right_x, bottom_y = rb
    
    mask[left_x:right_x, top_y] = .5
    mask[left_x:right_x, bottom_y] = .5
    mask[left_x, top_y:bottom_y] = .5
    mask[right_x, top_y:bottom_y] = .5

    visual_mask = (mask* 255).astype(np.uint8)
    visual_mask = Image.fromarray(visual_mask)
    plt.imshow(visual_mask)
    plt.show()



# From Tutorial (Box 79)
# def get_mask(segment_idx):
#     segment = results['segments_info'][segment_idx]
#     print("Visualizing mask for:", id2label[segment['label_id']])
#     mask = (predicted_panoptic_seg == segment['id'])
#     visual_mask = (mask * 255).astype(np.uint8)
#     return Image.fromarray(visual_mask)

# How to get ID

"""
>>> model.config.id2label
{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 
13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 
27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 
39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 
54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 
68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush', 80: 'banner', 81: 'blanket', 
82: 'bridge', 83: 'cardboard', 84: 'counter', 85: 'curtain', 86: 'door-stuff', 87: 'floor-wood', 88: 'flower', 89: 'fruit', 90: 'gravel', 91: 'house', 92: 'light', 93: 'mirror-stuff', 94: 'net', 95: 'pillow', 
96: 'platform', 97: 'playingfield', 98: 'railroad', 99: 'river', 100: 'road', 101: 'roof', 102: 'sand', 103: 'sea', 104: 'shelf', 105: 'snow', 106: 'stairs', 107: 'tent', 108: 'towel', 109: 'wall-brick', 
110: 'wall-stone', 111: 'wall-tile', 112: 'wall-wood', 113: 'water-other', 114: 'window-blind', 115: 'window-other', 116: 'tree-merged', 117: 'fence-merged', 118: 'ceiling-merged', 119: 'sky-other-merged', 
120: 'cabinet-merged', 121: 'table-merged', 122: 'floor-other-merged', 123: 'pavement-merged', 124: 'mountain-merged', 125: 'grass-merged', 126: 'dirt-merged', 127: 'paper-merged', 128: 'food-other-merged', 
129: 'building-other-merged', 130: 'rock-merged', 131: 'wall-other-merged', 132: 'rug-merged'}
>>> model.config.id2label[123]
'pavement-merged'
>>> results["segments_info"][1]
{'id': 2, 'label_id': 123, 'was_fused': False, 'score': 0.995813}
""" 
# Above labels don't correspond to anything ... https://github.com/nightrome/cocostuff/blob/master/labels.md
# This one was closest to helping: https://github.com/NielsRogge/Transformers-Tutorials/blob/master/MaskFormer/Inference/Inference_with_MaskFormer_for_semantic_%2B_panoptic_segmentation.ipynb

"""
>>> Image.fromarray((mask * 255).cpu().numpy().astype(np.uint8))
<PIL.Image.Image image mode=L size=2000x1500 at 0x7F07773691C0>
>>> temp = Image.fromarray((mask * 255).cpu().numpy().astype(np.uint8))
"""

"""
>>> mask = (results["segmentation"].cpu().numpy == 4)
>>> mask = (results["segmentation"].cpu().numpy() == 4)
>>> mask
array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])
>>> visual_mask = (mask * 255).astype(np.uint8)
>>> visual_mask = Image.fromarray(visual_mask)
>>> plt.imshow(visual_mask)
<matplotlib.image.AxesImage object at 0x7f0761e78040>
>>> plt.show()
"""

"""
>>> mask = (results["segmentation"].cpu().numpy() == 1)
>>> visual_mask = (mask*255).astype(np.uint8)
>>> visual_mask = Image.fromarray(visual_mask)
>>> plt.imshow(visual_mask)
<matplotlib.image.AxesImage object at 0x7f0760298550>
>>> plt.show()
>>> results["segments_info"][0]
{'id': 1, 'label_id': 25, 'was_fused': False, 'score': 0.998022}
>>> 
"""

"""
>>> np.where(mask==True)
(array([300, 300, 300, ..., 392, 392, 392]), array([452, 453, 454, ..., 473, 474, 475]))
>>> max(np.where(mask==True)[0])
392
>>> min(np.where(mask==True)[0])
300
>>> max(np.where(mask==True)[1])
538
>>> min(np.where(mask==True)[1])
399
"""