Spaces:
Sleeping
Sleeping
File size: 4,824 Bytes
1b7e337 968f0c3 1b7e337 60d260f 968f0c3 1b7e337 487a9af 1b7e337 487a9af 1b7e337 487a9af 1b7e337 487a9af 1b7e337 487a9af 1b7e337 487a9af 968f0c3 1b7e337 968f0c3 60d260f 487a9af b2f5c8e 968f0c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation
from PIL import Image, ImageDraw
import numpy as np
from torch import nn
import gradio as gr
import os
feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")
# https://github.com/NielsRogge/Transformers-Tutorials/blob/master/SegFormer/Segformer_inference_notebook.ipynb
def cityscapes_palette():
"""Cityscapes palette for external use."""
return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156],
[190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0],
[107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60],
[255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], [0, 80, 100],
[0, 0, 230], [119, 11, 32]]
def cityscapes_classes():
"""Cityscapes class names for external use."""
return [
'road', 'sidewalk', 'building', 'wall', 'fence', 'pole',
'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',
'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
'bicycle'
]
def annotation(image:ImageDraw, color_seg:np.array):
assert image.size == (1024, 1024)
assert color_seg.shape == (1024, 1024, 3)
blocks = 4 # 4x4 sub grid
step_size = 256 # sub square edge size
draw = ImageDraw.Draw(image)
sub_square_xy = [(x,y) for x in range(0, blocks * step_size, step_size) for y in range(0, blocks * step_size, step_size)]
# print(f"{sub_square_xy=}")
for (x,y) in sub_square_xy:
reduced_seg = color_seg.sum(axis=2) # collapsing all colors into 1024 x 1024
# print(f"{reduced_seg.shape=}")
sub_square_seg = reduced_seg[ y:y+step_size, x:x+step_size]
# print(f"{sub_square_seg.shape=}, {sub_square_seg.sum()}")
if (sub_square_seg.sum() > 100000):
print("light found at square ", x, y)
draw.rectangle([(x, y), (x + step_size, y + step_size)], outline=128, width=3)
def call(image): #nparray
resized = Image.fromarray(image).resize((1024,1024))
resized_image = np.array(resized)
print(f"{np.array(resized_image).shape=}") # 1024, 1024, 3
# resized_image = Image.fromarray(resized_image_np)
# print(f"{resized_image=}")
inputs = feature_extractor(images=resized_image, return_tensors="pt")
outputs = model(**inputs)
print(f"{outputs.logits.shape=}") # shape (batch_size, num_labels, height/4, width/4) -> 3, 19, 256 ,256
# print(f"{logits}")
# First, rescale logits to original image size
interpolated_logits = nn.functional.interpolate(
outputs.logits,
size=[1024, 1024], #resized_image.size[::-1], # (height, width)
mode='bilinear',
align_corners=False)
print(f"{interpolated_logits.shape=}, {outputs.logits.shape=}") # 1, 19, 1024, 1024
# Second, apply argmax on the class dimension
seg = interpolated_logits.argmax(dim=1)[0]
print(f"{seg.shape=}")
color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) # height, width, 3
print(f"{color_seg.shape=}")
for label, color in enumerate(cityscapes_palette()):
if (label == 6): color_seg[seg == label, :] = color
# Convert to BGR
color_seg = color_seg[..., ::-1]
print(f"{color_seg.shape=}")
# Show image + mask
img = np.array(resized_image) * 0.5 + color_seg * 0.5
img = img.astype(np.uint8)
out_im_file = Image.fromarray(img)
annotation(out_im_file, color_seg)
return out_im_file
# original_image = Image.open("./examples/1.jpg")
# print(f"{np.array(original_image).shape=}") # eg 729, 1000, 3
# out = call(original_image)
# out.save("out2.jpeg")
title = "Traffic Light Detector"
description = "Experiment traffic light detection to evaluate the value of captcha security controls"
iface = gr.Interface(fn=call,
inputs="image",
outputs="image",
title=title,
description=description,
examples=[
os.path.join(os.path.dirname(__file__), "examples/1.jpg"),
os.path.join(os.path.dirname(__file__), "examples/2.jpg"),
os.path.join(os.path.dirname(__file__), "examples/3.jpg"),
os.path.join(os.path.dirname(__file__), "examples/4.jpg"),
os.path.join(os.path.dirname(__file__), "examples/5.jpg"),
os.path.join(os.path.dirname(__file__), "examples/6.jpg"),
],
thumbnail="thumbnail.webp")
iface.launch() |