Commit
·
bc59155
1
Parent(s):
bfd3318
Upload 4 files
Browse files- Dockerfile.txt +12 -0
- function.yaml +36 -0
- main.py +35 -0
- model_handler.py +233 -0
Dockerfile.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM openvino/ubuntu20_dev:2022.3.0 AS build
|
2 |
+
|
3 |
+
USER root
|
4 |
+
|
5 |
+
RUN omz_downloader \
|
6 |
+
--name text-detection-0004 \
|
7 |
+
--precisions FP32 \
|
8 |
+
-o /opt/nuclio/open_model_zoo
|
9 |
+
|
10 |
+
FROM cvat.openvino.base
|
11 |
+
|
12 |
+
COPY --from=build --chown=root:root /opt/nuclio/open_model_zoo /opt/nuclio/open_model_zoo
|
function.yaml
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
metadata:
|
2 |
+
name: openvino-omz-intel-text-detection-0004
|
3 |
+
namespace: cvat
|
4 |
+
annotations:
|
5 |
+
name: Text detection v4
|
6 |
+
type: detector
|
7 |
+
framework: openvino
|
8 |
+
spec: |
|
9 |
+
[
|
10 |
+
{ "id": 1, "name": "text" }
|
11 |
+
]
|
12 |
+
|
13 |
+
spec:
|
14 |
+
description: Text detector based on PixelLink architecture with MobileNetV2-like as a backbone for indoor/outdoor scenes.
|
15 |
+
runtime: 'python:3.8'
|
16 |
+
handler: main:handler
|
17 |
+
eventTimeout: 30s
|
18 |
+
|
19 |
+
build:
|
20 |
+
image: cvat.openvino.omz.intel.text-detection-0004
|
21 |
+
baseImage: cvat.openvino.omz.intel.text-detection-0004.base
|
22 |
+
|
23 |
+
triggers:
|
24 |
+
myHttpTrigger:
|
25 |
+
maxWorkers: 2
|
26 |
+
kind: 'http'
|
27 |
+
workerAvailabilityTimeoutMilliseconds: 10000
|
28 |
+
attributes:
|
29 |
+
maxRequestBodySize: 33554432 # 32MB
|
30 |
+
|
31 |
+
platform:
|
32 |
+
attributes:
|
33 |
+
restartPolicy:
|
34 |
+
name: always
|
35 |
+
maximumRetryCount: 3
|
36 |
+
mountMode: volume
|
main.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import base64
|
3 |
+
from PIL import Image
|
4 |
+
import io
|
5 |
+
from model_handler import ModelHandler
|
6 |
+
import yaml
|
7 |
+
|
8 |
+
def init_context(context):
|
9 |
+
context.logger.info("Init context... 0%")
|
10 |
+
|
11 |
+
# Read labels
|
12 |
+
with open("/opt/nuclio/function.yaml", 'rb') as function_file:
|
13 |
+
functionconfig = yaml.safe_load(function_file)
|
14 |
+
labels_spec = functionconfig['metadata']['annotations']['spec']
|
15 |
+
labels = {item['id']: item['name'] for item in json.loads(labels_spec)}
|
16 |
+
|
17 |
+
# Read the DL model
|
18 |
+
model = ModelHandler(labels)
|
19 |
+
context.user_data.model = model
|
20 |
+
|
21 |
+
context.logger.info("Init context...100%")
|
22 |
+
|
23 |
+
def handler(context, event):
|
24 |
+
context.logger.info("Run text-detection-0004 model")
|
25 |
+
data = event.body
|
26 |
+
buf = io.BytesIO(base64.b64decode(data["image"]))
|
27 |
+
pixel_threshold = float(data.get("pixel_threshold", 0.8))
|
28 |
+
link_threshold = float(data.get("link_threshold", 0.8))
|
29 |
+
image = Image.open(buf)
|
30 |
+
|
31 |
+
results = context.user_data.model.infer(image,
|
32 |
+
pixel_threshold, link_threshold)
|
33 |
+
|
34 |
+
return context.Response(body=json.dumps(results), headers={},
|
35 |
+
content_type='application/json', status_code=200)
|
model_handler.py
ADDED
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (C) 2020-2022 Intel Corporation
|
2 |
+
# Copyright (C) 2022 CVAT.ai Corporation
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: MIT
|
5 |
+
|
6 |
+
import os
|
7 |
+
import cv2
|
8 |
+
import numpy as np
|
9 |
+
from model_loader import ModelLoader
|
10 |
+
from shared import to_cvat_mask
|
11 |
+
|
12 |
+
|
13 |
+
class PixelLinkDecoder():
|
14 |
+
def __init__(self, pixel_threshold, link_threshold):
|
15 |
+
four_neighbours = False
|
16 |
+
if four_neighbours:
|
17 |
+
self._get_neighbours = self._get_neighbours_4
|
18 |
+
else:
|
19 |
+
self._get_neighbours = self._get_neighbours_8
|
20 |
+
self.pixel_conf_threshold = pixel_threshold
|
21 |
+
self.link_conf_threshold = link_threshold
|
22 |
+
|
23 |
+
def decode(self, height, width, detections: dict):
|
24 |
+
self.image_height = height
|
25 |
+
self.image_width = width
|
26 |
+
self.pixel_scores = self._set_pixel_scores(detections['model/segm_logits/add'])
|
27 |
+
self.link_scores = self._set_link_scores(detections['model/link_logits_/add'])
|
28 |
+
|
29 |
+
self.pixel_mask = self.pixel_scores >= self.pixel_conf_threshold
|
30 |
+
self.link_mask = self.link_scores >= self.link_conf_threshold
|
31 |
+
self.points = list(zip(*np.where(self.pixel_mask)))
|
32 |
+
self.h, self.w = np.shape(self.pixel_mask)
|
33 |
+
self.group_mask = dict.fromkeys(self.points, -1)
|
34 |
+
self.bboxes = None
|
35 |
+
self.root_map = None
|
36 |
+
self.mask = None
|
37 |
+
|
38 |
+
self._decode()
|
39 |
+
|
40 |
+
def _softmax(self, x, axis=None):
|
41 |
+
return np.exp(x - self._logsumexp(x, axis=axis, keepdims=True))
|
42 |
+
|
43 |
+
# pylint: disable=no-self-use
|
44 |
+
def _logsumexp(self, a, axis=None, b=None, keepdims=False, return_sign=False):
|
45 |
+
if b is not None:
|
46 |
+
a, b = np.broadcast_arrays(a, b)
|
47 |
+
if np.any(b == 0):
|
48 |
+
a = a + 0. # promote to at least float
|
49 |
+
a[b == 0] = -np.inf
|
50 |
+
|
51 |
+
a_max = np.amax(a, axis=axis, keepdims=True)
|
52 |
+
|
53 |
+
if a_max.ndim > 0:
|
54 |
+
a_max[~np.isfinite(a_max)] = 0
|
55 |
+
elif not np.isfinite(a_max):
|
56 |
+
a_max = 0
|
57 |
+
|
58 |
+
if b is not None:
|
59 |
+
b = np.asarray(b)
|
60 |
+
tmp = b * np.exp(a - a_max)
|
61 |
+
else:
|
62 |
+
tmp = np.exp(a - a_max)
|
63 |
+
|
64 |
+
# suppress warnings about log of zero
|
65 |
+
with np.errstate(divide='ignore'):
|
66 |
+
s = np.sum(tmp, axis=axis, keepdims=keepdims)
|
67 |
+
if return_sign:
|
68 |
+
sgn = np.sign(s)
|
69 |
+
s *= sgn # /= makes more sense but we need zero -> zero
|
70 |
+
out = np.log(s)
|
71 |
+
|
72 |
+
if not keepdims:
|
73 |
+
a_max = np.squeeze(a_max, axis=axis)
|
74 |
+
out += a_max
|
75 |
+
|
76 |
+
if return_sign:
|
77 |
+
return out, sgn
|
78 |
+
else:
|
79 |
+
return out
|
80 |
+
|
81 |
+
def _set_pixel_scores(self, pixel_scores):
|
82 |
+
"get softmaxed properly shaped pixel scores"
|
83 |
+
tmp = np.transpose(pixel_scores, (0, 2, 3, 1))
|
84 |
+
return self._softmax(tmp, axis=-1)[0, :, :, 1]
|
85 |
+
|
86 |
+
def _set_link_scores(self, link_scores):
|
87 |
+
"get softmaxed properly shaped links scores"
|
88 |
+
tmp = np.transpose(link_scores, (0, 2, 3, 1))
|
89 |
+
tmp_reshaped = tmp.reshape(tmp.shape[:-1] + (8, 2))
|
90 |
+
return self._softmax(tmp_reshaped, axis=-1)[0, :, :, :, 1]
|
91 |
+
|
92 |
+
def _find_root(self, point):
|
93 |
+
root = point
|
94 |
+
update_parent = False
|
95 |
+
tmp = self.group_mask[root]
|
96 |
+
while tmp is not -1:
|
97 |
+
root = tmp
|
98 |
+
tmp = self.group_mask[root]
|
99 |
+
update_parent = True
|
100 |
+
if update_parent:
|
101 |
+
self.group_mask[point] = root
|
102 |
+
return root
|
103 |
+
|
104 |
+
def _join(self, p1, p2):
|
105 |
+
root1 = self._find_root(p1)
|
106 |
+
root2 = self._find_root(p2)
|
107 |
+
if root1 != root2:
|
108 |
+
self.group_mask[root2] = root1
|
109 |
+
|
110 |
+
def _get_index(self, root):
|
111 |
+
if root not in self.root_map:
|
112 |
+
self.root_map[root] = len(self.root_map) + 1
|
113 |
+
return self.root_map[root]
|
114 |
+
|
115 |
+
def _get_all(self):
|
116 |
+
self.root_map = {}
|
117 |
+
self.mask = np.zeros_like(self.pixel_mask, dtype=np.int32)
|
118 |
+
|
119 |
+
for point in self.points:
|
120 |
+
point_root = self._find_root(point)
|
121 |
+
bbox_idx = self._get_index(point_root)
|
122 |
+
self.mask[point] = bbox_idx
|
123 |
+
|
124 |
+
def _get_neighbours_8(self, x, y):
|
125 |
+
w, h = self.w, self.h
|
126 |
+
tmp = [(0, x - 1, y - 1), (1, x, y - 1),
|
127 |
+
(2, x + 1, y - 1), (3, x - 1, y),
|
128 |
+
(4, x + 1, y), (5, x - 1, y + 1),
|
129 |
+
(6, x, y + 1), (7, x + 1, y + 1)]
|
130 |
+
|
131 |
+
return [i for i in tmp if i[1] >= 0 and i[1] < w and i[2] >= 0 and i[2] < h]
|
132 |
+
|
133 |
+
def _get_neighbours_4(self, x, y):
|
134 |
+
w, h = self.w, self.h
|
135 |
+
tmp = [(1, x, y - 1),
|
136 |
+
(3, x - 1, y),
|
137 |
+
(4, x + 1, y),
|
138 |
+
(6, x, y + 1)]
|
139 |
+
|
140 |
+
return [i for i in tmp if i[1] >= 0 and i[1] < w and i[2] >= 0 and i[2] < h]
|
141 |
+
|
142 |
+
def _mask_to_bboxes(self, min_area=300, min_height=10):
|
143 |
+
self.bboxes = []
|
144 |
+
max_bbox_idx = self.mask.max()
|
145 |
+
mask_tmp = cv2.resize(self.mask, (self.image_width, self.image_height), interpolation=cv2.INTER_NEAREST)
|
146 |
+
|
147 |
+
for bbox_idx in range(1, max_bbox_idx + 1):
|
148 |
+
bbox_mask = mask_tmp == bbox_idx
|
149 |
+
cnts, _ = cv2.findContours(bbox_mask.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
150 |
+
if len(cnts) == 0:
|
151 |
+
continue
|
152 |
+
cnt = cnts[0]
|
153 |
+
rect, w, h = self._min_area_rect(cnt)
|
154 |
+
if min(w, h) < min_height:
|
155 |
+
continue
|
156 |
+
if w * h < min_area:
|
157 |
+
continue
|
158 |
+
self.bboxes.append(self._order_points(rect))
|
159 |
+
|
160 |
+
# pylint: disable=no-self-use
|
161 |
+
def _min_area_rect(self, cnt):
|
162 |
+
rect = cv2.minAreaRect(cnt)
|
163 |
+
w, h = rect[1]
|
164 |
+
box = cv2.boxPoints(rect)
|
165 |
+
box = np.int0(box)
|
166 |
+
return box, w, h
|
167 |
+
|
168 |
+
# pylint: disable=no-self-use
|
169 |
+
def _order_points(self, rect):
|
170 |
+
""" (x, y)
|
171 |
+
Order: TL, TR, BR, BL
|
172 |
+
"""
|
173 |
+
tmp = np.zeros_like(rect)
|
174 |
+
sums = rect.sum(axis=1)
|
175 |
+
tmp[0] = rect[np.argmin(sums)]
|
176 |
+
tmp[2] = rect[np.argmax(sums)]
|
177 |
+
diff = np.diff(rect, axis=1)
|
178 |
+
tmp[1] = rect[np.argmin(diff)]
|
179 |
+
tmp[3] = rect[np.argmax(diff)]
|
180 |
+
return tmp
|
181 |
+
|
182 |
+
def _decode(self):
|
183 |
+
for point in self.points:
|
184 |
+
y, x = point
|
185 |
+
neighbours = self._get_neighbours(x, y)
|
186 |
+
for n_idx, nx, ny in neighbours:
|
187 |
+
link_value = self.link_mask[y, x, n_idx]
|
188 |
+
pixel_cls = self.pixel_mask[ny, nx]
|
189 |
+
if link_value and pixel_cls:
|
190 |
+
self._join(point, (ny, nx))
|
191 |
+
|
192 |
+
self._get_all()
|
193 |
+
self._mask_to_bboxes()
|
194 |
+
|
195 |
+
class ModelHandler:
|
196 |
+
def __init__(self, labels):
|
197 |
+
base_dir = os.path.abspath(os.environ.get("MODEL_PATH",
|
198 |
+
"/opt/nuclio/open_model_zoo/intel/text-detection-0004/FP32"))
|
199 |
+
model_xml = os.path.join(base_dir, "text-detection-0004.xml")
|
200 |
+
model_bin = os.path.join(base_dir, "text-detection-0004.bin")
|
201 |
+
self.model = ModelLoader(model_xml, model_bin)
|
202 |
+
self.labels = labels
|
203 |
+
|
204 |
+
def infer(self, image, pixel_threshold, link_threshold):
|
205 |
+
output_layer = self.model.infer(image)
|
206 |
+
|
207 |
+
results = []
|
208 |
+
obj_class = 1
|
209 |
+
pcd = PixelLinkDecoder(pixel_threshold, link_threshold)
|
210 |
+
|
211 |
+
pcd.decode(image.height, image.width, output_layer)
|
212 |
+
for box in pcd.bboxes:
|
213 |
+
mask = pcd.pixel_mask
|
214 |
+
mask = np.array(mask, dtype=np.uint8)
|
215 |
+
mask = cv2.resize(mask, dsize=(image.width, image.height), interpolation=cv2.INTER_CUBIC)
|
216 |
+
cv2.normalize(mask, mask, 0, 255, cv2.NORM_MINMAX)
|
217 |
+
|
218 |
+
box = box.ravel().tolist()
|
219 |
+
x_min = min(box[::2])
|
220 |
+
x_max = max(box[::2])
|
221 |
+
y_min = min(box[1::2])
|
222 |
+
y_max = max(box[1::2])
|
223 |
+
cvat_mask = to_cvat_mask((x_min, y_min, x_max, y_max), mask)
|
224 |
+
|
225 |
+
results.append({
|
226 |
+
"confidence": None,
|
227 |
+
"label": self.labels.get(obj_class, "unknown"),
|
228 |
+
"points": box,
|
229 |
+
"mask": cvat_mask,
|
230 |
+
"type": "mask",
|
231 |
+
})
|
232 |
+
|
233 |
+
return results
|