Image-Segmentation-Various

Running

File size: 13,610 Bytes

import matplotlib.pyplot as plt
import requests, validators
import torch
import pathlib
import numpy as np
from PIL import Image
import cv2 as cv

from transformers import DetrFeatureExtractor, DetrForSegmentation, MaskFormerImageProcessor, MaskFormerForInstanceSegmentation
# from transformers.models.detr.feature_extraction_detr import rgb_to_id
from transformers.image_transforms import rgb_to_id

TEST_IMAGE = Image.open(r"images/9999999_00783_d_0000358.jpg")
MODEL_NAME_DETR = "facebook/detr-resnet-50-panoptic"
MODEL_NAME_MASKFORMER = "facebook/maskformer-swin-large-coco"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#######
# Parameters
#######
image = TEST_IMAGE
model_name = MODEL_NAME_MASKFORMER

# Starting with MaskFormer

processor = MaskFormerImageProcessor.from_pretrained(model_name) # <class 'transformers.models.maskformer.image_processing_maskformer.MaskFormerImageProcessor'>
# DIR() --> ['__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', 
#           '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', 
#           '__weakref__', '_auto_class', '_create_repo', '_get_files_timestamps', '_max_size', '_pad_image', '_preprocess', '_preprocess_image', '_preprocess_mask', '_processor_class', 
#           '_set_processor_class', '_upload_modified_files', 'center_crop', 'convert_segmentation_map_to_binary_masks', 'do_normalize', 'do_reduce_labels', 'do_rescale', 'do_resize', 
#           'encode_inputs', 'fetch_images', 'from_dict', 'from_json_file', 'from_pretrained', 'get_image_processor_dict', 'ignore_index', 'image_mean', 'image_std', 'model_input_names', 
#           'normalize', 'pad', 'post_process_instance_segmentation', 'post_process_panoptic_segmentation', 'post_process_segmentation', 'post_process_semantic_segmentation', 'preprocess', 
#           'push_to_hub', 'register_for_auto_class', 'resample', 'rescale', 'rescale_factor', 'resize', 'save_pretrained', 'size', 'size_divisor', 'to_dict', 'to_json_file', 'to_json_string']

model = MaskFormerForInstanceSegmentation.from_pretrained(model_name) # <class 'transformers.models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentation'>
# DIR for model was too big
model.to(DEVICE)

# img = np.array(TEST_IMAGE)

inputs = processor(images=image, return_tensors="pt") # <class 'transformers.image_processing_utils.BatchFeature'>
# DIR() --> ['_MutableMapping__marker', '__abstractmethods__', '__class__', '__contains__', '__copy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', 
#           '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', 
#           '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__setattr__', '__setitem__', '__setstate__', '__sizeof__', '__slots__', '__str__', 
#           '__subclasshook__', '__weakref__', '_abc_impl', '_get_is_as_tensor_fns', 'clear', 'convert_to_tensors', 'copy', 'data', 'fromkeys', 'get', 'items', 'keys', 'pop', 'popitem', 
#           'setdefault', 'to', 'update', 'values']
inputs.to(DEVICE)


outputs = model(**inputs) # <class 'transformers.models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentationOutput'>
# Each element of this class is a <class 'torch.Tensor'>
# DIR() --> ['__annotations__', '__class__', '__contains__', '__dataclass_fields__', '__dataclass_params__', '__delattr__', '__delitem__', '__dict__', '__dir__', 
#           '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', 
#           '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__post_init__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__setattr__', 
#           '__setitem__', '__sizeof__', '__str__', '__subclasshook__', 'attentions', 'auxiliary_logits', 'class_queries_logits', 'clear', 'copy', 'encoder_hidden_states', 
#           'encoder_last_hidden_state', 'fromkeys', 'get', 'hidden_states', 'items', 'keys', 'loss', 'masks_queries_logits', 'move_to_end', 'pixel_decoder_hidden_states', 
#           'pixel_decoder_last_hidden_state', 'pop', 'popitem', 'setdefault', 'to_tuple', 'transformer_decoder_hidden_states', 'transformer_decoder_last_hidden_state', 
#           'update', 'values']

results = processor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
# <class 'dict'>
# Keys: dict_keys(['segmentation', 'segments_info'])
# type(results["segments_info"]) --> list
# type(results["segmentation"]) --> <class 'torch.Tensor'>


def show_mask_for_number(map_to_use, label_id):
    """
    map_to_use: You have to pass in `results["segmentation"]`
    """
    if torch.cuda.is_available():
        mask = (map_to_use.cpu().numpy() == label_id)
    else:
        mask = (map_to_use.numpy() == label_id)
    
    visual_mask = (mask* 255).astype(np.uint8)
    visual_mask = Image.fromarray(visual_mask)
    plt.imshow(visual_mask)
    plt.show()

def show_mask_for_number_over_image(map_to_use, label_id, image_object):
    """
    map_to_use: You have to pass in `results["segmentation"]`
    """
    if torch.cuda.is_available():
        mask = (map_to_use.cpu().numpy() == label_id)
    else:
        mask = (map_to_use.numpy() == label_id)
    
    visual_mask = (mask* 255).astype(np.uint8)
    visual_mask = Image.fromarray(visual_mask)
    plt.imshow(image_object)
    plt.imshow(visual_mask, alpha=0.25)
    plt.show()


def get_coordinates_for_bb_simple(map_to_use, label_id):
    """
    map_to_use: You have to pass in `results["segmentation"]`
    """
    if torch.cuda.is_available():
        mask = (map_to_use.cpu().numpy() == label_id)
    else:
        mask = (map_to_use.numpy() == label_id)
    
    x, y = np.where(mask==True)
    x_max, x_min = max(x), min(x)
    y_max, y_min = max(y), min(y)
    return (x_min, y_min), (x_max, y_max)

def make_simple_box(left_top, right_bottom, map_size):
    full_mask = np.full(map_size, False)
    left_x, top_y = left_top
    right_x, bottom_y = right_bottom
    full_mask[left_x:right_x, top_y] = True
    full_mask[left_x:right_x, bottom_y] = True
    full_mask[left_x, top_y:bottom_y] = True
    full_mask[right_x, top_y:bottom_y] = True

    visual_mask = (full_mask* 255).astype(np.uint8)
    visual_mask = Image.fromarray(visual_mask)
    plt.imshow(visual_mask)
    plt.show()


def test(map_to_use, label_id):
    """
    map_to_use: You have to pass in `results["segmentation"]`
    """
    if torch.cuda.is_available():
        mask = (map_to_use.cpu().numpy() == label_id)
    else:
        mask = (map_to_use.numpy() == label_id)
    
    
    lt, rb = get_coordinates_for_bb_simple(map_to_use, label_id)
    left_x, top_y = lt
    right_x, bottom_y = rb
    
    mask[left_x:right_x, top_y] = .5
    mask[left_x:right_x, bottom_y] = .5
    mask[left_x, top_y:bottom_y] = .5
    mask[right_x, top_y:bottom_y] = .5

    visual_mask = (mask* 255).astype(np.uint8)
    visual_mask = Image.fromarray(visual_mask)
    plt.imshow(visual_mask)
    plt.show()



# From Tutorial (Box 79)
# def get_mask(segment_idx):
#     segment = results['segments_info'][segment_idx]
#     print("Visualizing mask for:", id2label[segment['label_id']])
#     mask = (predicted_panoptic_seg == segment['id'])
#     visual_mask = (mask * 255).astype(np.uint8)
#     return Image.fromarray(visual_mask)

# How to get ID

"""
>>> model.config.id2label
{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 
13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 
27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 
39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 
54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 
68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush', 80: 'banner', 81: 'blanket', 
82: 'bridge', 83: 'cardboard', 84: 'counter', 85: 'curtain', 86: 'door-stuff', 87: 'floor-wood', 88: 'flower', 89: 'fruit', 90: 'gravel', 91: 'house', 92: 'light', 93: 'mirror-stuff', 94: 'net', 95: 'pillow', 
96: 'platform', 97: 'playingfield', 98: 'railroad', 99: 'river', 100: 'road', 101: 'roof', 102: 'sand', 103: 'sea', 104: 'shelf', 105: 'snow', 106: 'stairs', 107: 'tent', 108: 'towel', 109: 'wall-brick', 
110: 'wall-stone', 111: 'wall-tile', 112: 'wall-wood', 113: 'water-other', 114: 'window-blind', 115: 'window-other', 116: 'tree-merged', 117: 'fence-merged', 118: 'ceiling-merged', 119: 'sky-other-merged', 
120: 'cabinet-merged', 121: 'table-merged', 122: 'floor-other-merged', 123: 'pavement-merged', 124: 'mountain-merged', 125: 'grass-merged', 126: 'dirt-merged', 127: 'paper-merged', 128: 'food-other-merged', 
129: 'building-other-merged', 130: 'rock-merged', 131: 'wall-other-merged', 132: 'rug-merged'}
>>> model.config.id2label[123]
'pavement-merged'
>>> results["segments_info"][1]
{'id': 2, 'label_id': 123, 'was_fused': False, 'score': 0.995813}
""" 
# Above labels don't correspond to anything ... https://github.com/nightrome/cocostuff/blob/master/labels.md
# This one was closest to helping: https://github.com/NielsRogge/Transformers-Tutorials/blob/master/MaskFormer/Inference/Inference_with_MaskFormer_for_semantic_%2B_panoptic_segmentation.ipynb

"""
>>> Image.fromarray((mask * 255).cpu().numpy().astype(np.uint8))
<PIL.Image.Image image mode=L size=2000x1500 at 0x7F07773691C0>
>>> temp = Image.fromarray((mask * 255).cpu().numpy().astype(np.uint8))
"""

"""
>>> mask = (results["segmentation"].cpu().numpy == 4)
>>> mask = (results["segmentation"].cpu().numpy() == 4)
>>> mask
array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])
>>> visual_mask = (mask * 255).astype(np.uint8)
>>> visual_mask = Image.fromarray(visual_mask)
>>> plt.imshow(visual_mask)
<matplotlib.image.AxesImage object at 0x7f0761e78040>
>>> plt.show()
"""

"""
>>> mask = (results["segmentation"].cpu().numpy() == 1)
>>> visual_mask = (mask*255).astype(np.uint8)
>>> visual_mask = Image.fromarray(visual_mask)
>>> plt.imshow(visual_mask)
<matplotlib.image.AxesImage object at 0x7f0760298550>
>>> plt.show()
>>> results["segments_info"][0]
{'id': 1, 'label_id': 25, 'was_fused': False, 'score': 0.998022}
>>> 
"""

"""
>>> np.where(mask==True)
(array([300, 300, 300, ..., 392, 392, 392]), array([452, 453, 454, ..., 473, 474, 475]))
>>> max(np.where(mask==True)[0])
392
>>> min(np.where(mask==True)[0])
300
>>> max(np.where(mask==True)[1])
538
>>> min(np.where(mask==True)[1])
399
"""


def contour_map(map_to_use, label_id):
    """
    map_to_use: You have to pass in `results["segmentation"]`
    """
    if torch.cuda.is_available():
        mask = (map_to_use.cpu().numpy() == label_id)
    else:
        mask = (map_to_use.numpy() == label_id)
    
    visual_mask = (mask* 255).astype(np.uint8)
    contours, hierarchy = cv.findContours(visual_mask, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
    return contours, hierarchy

"""
>>> mask = (results["segmentation"].cpu().numpy() == 1)
>>> visual_mask = (mask* 255).astype(np.uint8)
>>> import cv2 as cv
>>> contours, hierarchy = cv.findContours(visual_mask, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE)
>>> contours.shape
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
AttributeError: 'tuple' object has no attribute 'shape'
>>> contours[0].shape
(7, 1, 2)
>>> shrunk = contours[0][:, 0, :]
>>> shrunk
array([[400, 340],
       [399, 341],
       [400, 342],
       [401, 342],
       [402, 341],
       [403, 341],
       [402, 340]], dtype=int32)
>>> get_coordinates_for_bb_simple(results["segmentation"], 1)
((300, 399), (392, 538))
>>> shrunk = contours[1][:, 0, :]
>>> max(shrunk[:, 0])
538
>>> min(shrunk[:, 0])
409
>>> min(shrunk[:, 1])
300
>>> max(shrunk[:, 1])
392
>>> 
"""



"""
import cv2 as cv
contours, hierarchy = cv.findContours(visual_mask, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE)
shrunk = contours[0][:, 0, :]

>>> shrunk[0, :]
array([1907,  887], dtype=int32)
>>> shrunk[:, 0]
array([1907, 1907, 1908, 1908, 1908], dtype=int32)
>>> shrunk[:, 1]
array([887, 888, 889, 890, 888], dtype=int32)
>>> shrunk
array([[1907,  887],
       [1907,  888],
       [1908,  889],
       [1908,  890],
       [1908,  888]], dtype=int32)
"""